Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux

Pull module updates from Rusty Russell:
 "Main excitement here is Peter Zijlstra's lockless rbtree optimization
  to speed module address lookup.  He found some abusers of the module
  lock doing that too.

  A little bit of parameter work here too; including Dan Streetman's
  breaking up the big param mutex so writing a parameter can load
  another module (yeah, really).  Unfortunately that broke the usual
  suspects, !CONFIG_MODULES and !CONFIG_SYSFS, so those fixes were
  appended too"

* tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (26 commits)
  modules: only use mod->param_lock if CONFIG_MODULES
  param: fix module param locks when !CONFIG_SYSFS.
  rcu: merge fix for Convert ACCESS_ONCE() to READ_ONCE() and WRITE_ONCE()
  module: add per-module param_lock
  module: make perm const
  params: suppress unused variable error, warn once just in case code changes.
  modules: clarify CONFIG_MODULE_COMPRESS help, suggest 'N'.
  kernel/module.c: avoid ifdefs for sig_enforce declaration
  kernel/workqueue.c: remove ifdefs over wq_power_efficient
  kernel/params.c: export param_ops_bool_enable_only
  kernel/params.c: generalize bool_enable_only
  kernel/module.c: use generic module param operaters for sig_enforce
  kernel/params: constify struct kernel_param_ops uses
  sysfs: tightened sysfs permission checks
  module: Rework module_addr_{min,max}
  module: Use __module_address() for module_address_lookup()
  module: Make the mod_tree stuff conditional on PERF_EVENTS || TRACING
  module: Optimize __module_address() using a latched RB-tree
  rbtree: Implement generic latch_tree
  seqlock: Introduce raw_read_seqcount_latch()
  ...
This commit is contained in:
Linus Torvalds
2015-07-01 10:49:25 -07:00
48 changed files with 887 additions and 359 deletions

View File

@@ -101,48 +101,201 @@
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
#ifdef CONFIG_MODULES_TREE_LOOKUP
/*
* Use a latched RB-tree for __module_address(); this allows us to use
* RCU-sched lookups of the address from any context.
*
* Because modules have two address ranges: init and core, we need two
* latch_tree_nodes entries. Therefore we need the back-pointer from
* mod_tree_node.
*
* Because init ranges are short lived we mark them unlikely and have placed
* them outside the critical cacheline in struct module.
*
* This is conditional on PERF_EVENTS || TRACING because those can really hit
* __module_address() hard by doing a lot of stack unwinding; potentially from
* NMI context.
*/
static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
{
struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
struct module *mod = mtn->mod;
if (unlikely(mtn == &mod->mtn_init))
return (unsigned long)mod->module_init;
return (unsigned long)mod->module_core;
}
static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
{
struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
struct module *mod = mtn->mod;
if (unlikely(mtn == &mod->mtn_init))
return (unsigned long)mod->init_size;
return (unsigned long)mod->core_size;
}
static __always_inline bool
mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
{
return __mod_tree_val(a) < __mod_tree_val(b);
}
static __always_inline int
mod_tree_comp(void *key, struct latch_tree_node *n)
{
unsigned long val = (unsigned long)key;
unsigned long start, end;
start = __mod_tree_val(n);
if (val < start)
return -1;
end = start + __mod_tree_size(n);
if (val >= end)
return 1;
return 0;
}
static const struct latch_tree_ops mod_tree_ops = {
.less = mod_tree_less,
.comp = mod_tree_comp,
};
static struct mod_tree_root {
struct latch_tree_root root;
unsigned long addr_min;
unsigned long addr_max;
} mod_tree __cacheline_aligned = {
.addr_min = -1UL,
};
#define module_addr_min mod_tree.addr_min
#define module_addr_max mod_tree.addr_max
static noinline void __mod_tree_insert(struct mod_tree_node *node)
{
latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
}
static void __mod_tree_remove(struct mod_tree_node *node)
{
latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
}
/*
* These modifications: insert, remove_init and remove; are serialized by the
* module_mutex.
*/
static void mod_tree_insert(struct module *mod)
{
mod->mtn_core.mod = mod;
mod->mtn_init.mod = mod;
__mod_tree_insert(&mod->mtn_core);
if (mod->init_size)
__mod_tree_insert(&mod->mtn_init);
}
static void mod_tree_remove_init(struct module *mod)
{
if (mod->init_size)
__mod_tree_remove(&mod->mtn_init);
}
static void mod_tree_remove(struct module *mod)
{
__mod_tree_remove(&mod->mtn_core);
mod_tree_remove_init(mod);
}
static struct module *mod_find(unsigned long addr)
{
struct latch_tree_node *ltn;
ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
if (!ltn)
return NULL;
return container_of(ltn, struct mod_tree_node, node)->mod;
}
#else /* MODULES_TREE_LOOKUP */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
static void mod_tree_insert(struct module *mod) { }
static void mod_tree_remove_init(struct module *mod) { }
static void mod_tree_remove(struct module *mod) { }
static struct module *mod_find(unsigned long addr)
{
struct module *mod;
list_for_each_entry_rcu(mod, &modules, list) {
if (within_module(addr, mod))
return mod;
}
return NULL;
}
#endif /* MODULES_TREE_LOOKUP */
/*
* Bounds of module text, for speeding up __module_address.
* Protected by module_mutex.
*/
static void __mod_update_bounds(void *base, unsigned int size)
{
unsigned long min = (unsigned long)base;
unsigned long max = min + size;
if (min < module_addr_min)
module_addr_min = min;
if (max > module_addr_max)
module_addr_max = max;
}
static void mod_update_bounds(struct module *mod)
{
__mod_update_bounds(mod->module_core, mod->core_size);
if (mod->init_size)
__mod_update_bounds(mod->module_init, mod->init_size);
}
#ifdef CONFIG_KGDB_KDB
struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
#endif /* CONFIG_KGDB_KDB */
#ifdef CONFIG_MODULE_SIG
#ifdef CONFIG_MODULE_SIG_FORCE
static bool sig_enforce = true;
#else
static bool sig_enforce = false;
static int param_set_bool_enable_only(const char *val,
const struct kernel_param *kp)
static void module_assert_mutex(void)
{
int err;
bool test;
struct kernel_param dummy_kp = *kp;
dummy_kp.arg = &test;
err = param_set_bool(val, &dummy_kp);
if (err)
return err;
/* Don't let them unset it once it's set! */
if (!test && sig_enforce)
return -EROFS;
if (test)
sig_enforce = true;
return 0;
lockdep_assert_held(&module_mutex);
}
static const struct kernel_param_ops param_ops_bool_enable_only = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_bool_enable_only,
.get = param_get_bool,
};
#define param_check_bool_enable_only param_check_bool
static void module_assert_mutex_or_preempt(void)
{
#ifdef CONFIG_LOCKDEP
if (unlikely(!debug_locks))
return;
WARN_ON(!rcu_read_lock_sched_held() &&
!lockdep_is_held(&module_mutex));
#endif
}
static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
#ifndef CONFIG_MODULE_SIG_FORCE
module_param(sig_enforce, bool_enable_only, 0644);
#endif /* !CONFIG_MODULE_SIG_FORCE */
#endif /* CONFIG_MODULE_SIG */
/* Block module loading/unloading? */
int modules_disabled = 0;
@@ -153,10 +306,6 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
/* Bounds of module allocation, for speeding __module_address.
* Protected by module_mutex. */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
int register_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -318,6 +467,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
#endif
};
module_assert_mutex_or_preempt();
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
return true;
@@ -457,6 +608,8 @@ static struct module *find_module_all(const char *name, size_t len,
{
struct module *mod;
module_assert_mutex();
list_for_each_entry(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
@@ -1169,11 +1322,17 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
{
const unsigned long *crc;
/* Since this should be found in kernel (which can't be removed),
* no locking is necessary. */
/*
* Since this should be found in kernel (which can't be removed), no
* locking is necessary -- use preempt_disable() to placate lockdep.
*/
preempt_disable();
if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
&crc, true, false))
&crc, true, false)) {
preempt_enable();
BUG();
}
preempt_enable();
return check_version(sechdrs, versindex,
VMLINUX_SYMBOL_STR(module_layout), mod, crc,
NULL);
@@ -1661,6 +1820,10 @@ static void mod_sysfs_fini(struct module *mod)
mod_kobject_put(mod);
}
static void init_param_lock(struct module *mod)
{
mutex_init(&mod->param_lock);
}
#else /* !CONFIG_SYSFS */
static int mod_sysfs_setup(struct module *mod,
@@ -1683,6 +1846,9 @@ static void del_usage_links(struct module *mod)
{
}
static void init_param_lock(struct module *mod)
{
}
#endif /* CONFIG_SYSFS */
static void mod_sysfs_teardown(struct module *mod)
@@ -1852,10 +2018,11 @@ static void free_module(struct module *mod)
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
mod_tree_remove(mod);
/* Remove this module from bug list, this uses list_del_rcu */
module_bug_cleanup(mod);
/* Wait for RCU synchronizing before releasing mod->list and buglist. */
synchronize_rcu();
/* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
synchronize_sched();
mutex_unlock(&module_mutex);
/* This may be NULL, but that's OK */
@@ -2384,22 +2551,6 @@ void * __weak module_alloc(unsigned long size)
return vmalloc_exec(size);
}
static void *module_alloc_update_bounds(unsigned long size)
{
void *ret = module_alloc(size);
if (ret) {
mutex_lock(&module_mutex);
/* Update module bounds. */
if ((unsigned long)ret < module_addr_min)
module_addr_min = (unsigned long)ret;
if ((unsigned long)ret + size > module_addr_max)
module_addr_max = (unsigned long)ret + size;
mutex_unlock(&module_mutex);
}
return ret;
}
#ifdef CONFIG_DEBUG_KMEMLEAK
static void kmemleak_load_module(const struct module *mod,
const struct load_info *info)
@@ -2805,7 +2956,7 @@ static int move_module(struct module *mod, struct load_info *info)
void *ptr;
/* Do the allocs. */
ptr = module_alloc_update_bounds(mod->core_size);
ptr = module_alloc(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@@ -2819,7 +2970,7 @@ static int move_module(struct module *mod, struct load_info *info)
mod->module_core = ptr;
if (mod->init_size) {
ptr = module_alloc_update_bounds(mod->init_size);
ptr = module_alloc(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
@@ -3119,6 +3270,7 @@ static noinline int do_init_module(struct module *mod)
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
mod_tree_remove_init(mod);
unset_module_init_ro_nx(mod);
module_arch_freeing_init(mod);
mod->module_init = NULL;
@@ -3127,11 +3279,11 @@ static noinline int do_init_module(struct module *mod)
mod->init_text_size = 0;
/*
* We want to free module_init, but be aware that kallsyms may be
* walking this with preempt disabled. In all the failure paths,
* we call synchronize_rcu/synchronize_sched, but we don't want
* to slow down the success path, so use actual RCU here.
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here.
*/
call_rcu(&freeinit->rcu, do_free_init);
call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
wake_up_all(&module_wq);
@@ -3188,7 +3340,9 @@ again:
err = -EEXIST;
goto out;
}
mod_update_bounds(mod);
list_add_rcu(&mod->list, &modules);
mod_tree_insert(mod);
err = 0;
out:
@@ -3304,6 +3458,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
if (err)
goto unlink_mod;
init_param_lock(mod);
/* Now we've got everything in the final locations, we can
* find optional sections. */
err = find_module_sections(mod, info);
@@ -3402,8 +3558,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
/* Wait for RCU synchronizing before releasing mod->list. */
synchronize_rcu();
/* Wait for RCU-sched synchronizing before releasing mod->list. */
synchronize_sched();
mutex_unlock(&module_mutex);
free_module:
/* Free lock-classes; relies on the preceding sync_rcu() */
@@ -3527,19 +3683,15 @@ const char *module_address_lookup(unsigned long addr,
char **modname,
char *namebuf)
{
struct module *mod;
const char *ret = NULL;
struct module *mod;
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (within_module(addr, mod)) {
if (modname)
*modname = mod->name;
ret = get_ksymbol(mod, addr, size, offset);
break;
}
mod = __module_address(addr);
if (mod) {
if (modname)
*modname = mod->name;
ret = get_ksymbol(mod, addr, size, offset);
}
/* Make a copy in here where it's safe */
if (ret) {
@@ -3547,6 +3699,7 @@ const char *module_address_lookup(unsigned long addr,
ret = namebuf;
}
preempt_enable();
return ret;
}
@@ -3670,6 +3823,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
unsigned int i;
int ret;
module_assert_mutex();
list_for_each_entry(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
@@ -3844,13 +3999,15 @@ struct module *__module_address(unsigned long addr)
if (addr < module_addr_min || addr > module_addr_max)
return NULL;
list_for_each_entry_rcu(mod, &modules, list) {
module_assert_mutex_or_preempt();
mod = mod_find(addr);
if (mod) {
BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (within_module(addr, mod))
return mod;
mod = NULL;
}
return NULL;
return mod;
}
EXPORT_SYMBOL_GPL(__module_address);