[PATCH] proc: Rewrite the proc dentry flush on exit optimization
To keep the dcache from filling up with dead /proc entries we flush them on process exit. However over the years that code has gotten hairy with a dentry_pointer and a lock in task_struct and misdocumented as a correctness feature. I have rewritten this code to look and see if we have a corresponding entry in the dcache and if so flush it on process exit. This removes the extra fields in the task_struct and allows me to trivially handle the case of a /proc/<tgid>/task/<pid> entry as well as the current /proc/<pid> entries. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
		 Eric W. Biederman
					Eric W. Biederman
				
			
				
					committed by
					
						 Linus Torvalds
						Linus Torvalds
					
				
			
			
				
	
			
			
			 Linus Torvalds
						Linus Torvalds
					
				
			
						parent
						
							662795deb8
						
					
				
				
					commit
					48e6484d49
				
			
							
								
								
									
										10
									
								
								fs/exec.c
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								fs/exec.c
									
									
									
									
									
								
							| @@ -666,8 +666,6 @@ static int de_thread(struct task_struct *tsk) | |||||||
| 	 * and to assume its PID: | 	 * and to assume its PID: | ||||||
| 	 */ | 	 */ | ||||||
| 	if (!thread_group_leader(current)) { | 	if (!thread_group_leader(current)) { | ||||||
| 		struct dentry *proc_dentry1, *proc_dentry2; |  | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * Wait for the thread group leader to be a zombie. | 		 * Wait for the thread group leader to be a zombie. | ||||||
| 		 * It should already be zombie at this point, most | 		 * It should already be zombie at this point, most | ||||||
| @@ -689,10 +687,6 @@ static int de_thread(struct task_struct *tsk) | |||||||
| 		 */ | 		 */ | ||||||
| 		current->start_time = leader->start_time; | 		current->start_time = leader->start_time; | ||||||
|  |  | ||||||
| 		spin_lock(&leader->proc_lock); |  | ||||||
| 		spin_lock(¤t->proc_lock); |  | ||||||
| 		proc_dentry1 = proc_pid_unhash(current); |  | ||||||
| 		proc_dentry2 = proc_pid_unhash(leader); |  | ||||||
| 		write_lock_irq(&tasklist_lock); | 		write_lock_irq(&tasklist_lock); | ||||||
|  |  | ||||||
| 		BUG_ON(leader->tgid != current->tgid); | 		BUG_ON(leader->tgid != current->tgid); | ||||||
| @@ -729,10 +723,6 @@ static int de_thread(struct task_struct *tsk) | |||||||
| 		leader->exit_state = EXIT_DEAD; | 		leader->exit_state = EXIT_DEAD; | ||||||
|  |  | ||||||
| 		write_unlock_irq(&tasklist_lock); | 		write_unlock_irq(&tasklist_lock); | ||||||
| 		spin_unlock(&leader->proc_lock); |  | ||||||
| 		spin_unlock(¤t->proc_lock); |  | ||||||
| 		proc_pid_flush(proc_dentry1); |  | ||||||
| 		proc_pid_flush(proc_dentry2); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
|   | |||||||
							
								
								
									
										134
									
								
								fs/proc/base.c
									
									
									
									
									
								
							
							
						
						
									
										134
									
								
								fs/proc/base.c
									
									
									
									
									
								
							| @@ -1352,16 +1352,6 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) | |||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
| static void pid_base_iput(struct dentry *dentry, struct inode *inode) |  | ||||||
| { |  | ||||||
| 	struct task_struct *task = proc_task(inode); |  | ||||||
| 	spin_lock(&task->proc_lock); |  | ||||||
| 	if (task->proc_dentry == dentry) |  | ||||||
| 		task->proc_dentry = NULL; |  | ||||||
| 	spin_unlock(&task->proc_lock); |  | ||||||
| 	iput(inode); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static int pid_delete_dentry(struct dentry * dentry) | static int pid_delete_dentry(struct dentry * dentry) | ||||||
| { | { | ||||||
| 	/* Is the task we represent dead? | 	/* Is the task we represent dead? | ||||||
| @@ -1383,13 +1373,6 @@ static struct dentry_operations pid_dentry_operations = | |||||||
| 	.d_delete	= pid_delete_dentry, | 	.d_delete	= pid_delete_dentry, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| static struct dentry_operations pid_base_dentry_operations = |  | ||||||
| { |  | ||||||
| 	.d_revalidate	= pid_revalidate, |  | ||||||
| 	.d_iput		= pid_base_iput, |  | ||||||
| 	.d_delete	= pid_delete_dentry, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /* Lookups */ | /* Lookups */ | ||||||
|  |  | ||||||
| static unsigned name_to_int(struct dentry *dentry) | static unsigned name_to_int(struct dentry *dentry) | ||||||
| @@ -1859,57 +1842,70 @@ static struct inode_operations proc_self_inode_operations = { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * proc_pid_unhash -  Unhash /proc/@pid entry from the dcache. |  * proc_flush_task -  Remove dcache entries for @task from the /proc dcache. | ||||||
|  * @p: task that should be flushed. |  | ||||||
|  * |  * | ||||||
|  * Drops the /proc/@pid dcache entry from the hash chains. |  * @task: task that should be flushed. | ||||||
|  * |  * | ||||||
|  * Dropping /proc/@pid entries and detach_pid must be synchroneous, |  * Looks in the dcache for | ||||||
|  * otherwise e.g. /proc/@pid/exe might point to the wrong executable, |  * /proc/@pid | ||||||
|  * if the pid value is immediately reused. This is enforced by |  * /proc/@tgid/task/@pid | ||||||
|  * - caller must acquire spin_lock(p->proc_lock) |  * if either directory is present flushes it and all of it'ts children | ||||||
|  * - must be called before detach_pid() |  * from the dcache. | ||||||
|  * - proc_pid_lookup acquires proc_lock, and checks that |  * | ||||||
|  *   the target is not dead by looking at the attach count |  * It is safe and reasonable to cache /proc entries for a task until | ||||||
|  *   of PIDTYPE_PID. |  * that task exits.  After that they just clog up the dcache with | ||||||
|  |  * useless entries, possibly causing useful dcache entries to be | ||||||
|  |  * flushed instead.  This routine is proved to flush those useless | ||||||
|  |  * dcache entries at process exit time. | ||||||
|  |  * | ||||||
|  |  * NOTE: This routine is just an optimization so it does not guarantee | ||||||
|  |  *       that no dcache entries will exist at process exit time it | ||||||
|  |  *       just makes it very unlikely that any will persist. | ||||||
|  */ |  */ | ||||||
|  | void proc_flush_task(struct task_struct *task) | ||||||
| struct dentry *proc_pid_unhash(struct task_struct *p) |  | ||||||
| { | { | ||||||
| 	struct dentry *proc_dentry; | 	struct dentry *dentry, *leader, *dir; | ||||||
|  | 	char buf[30]; | ||||||
|  | 	struct qstr name; | ||||||
|  |  | ||||||
| 	proc_dentry = p->proc_dentry; | 	name.name = buf; | ||||||
| 	if (proc_dentry != NULL) { | 	name.len = snprintf(buf, sizeof(buf), "%d", task->pid); | ||||||
|  | 	dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); | ||||||
| 		spin_lock(&dcache_lock); | 	if (dentry) { | ||||||
| 		spin_lock(&proc_dentry->d_lock); | 		shrink_dcache_parent(dentry); | ||||||
| 		if (!d_unhashed(proc_dentry)) { | 		d_drop(dentry); | ||||||
| 			dget_locked(proc_dentry); | 		dput(dentry); | ||||||
| 			__d_drop(proc_dentry); |  | ||||||
| 			spin_unlock(&proc_dentry->d_lock); |  | ||||||
| 		} else { |  | ||||||
| 			spin_unlock(&proc_dentry->d_lock); |  | ||||||
| 			proc_dentry = NULL; |  | ||||||
| 		} |  | ||||||
| 		spin_unlock(&dcache_lock); |  | ||||||
| 	} |  | ||||||
| 	return proc_dentry; |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| /** | 	if (thread_group_leader(task)) | ||||||
|  * proc_pid_flush - recover memory used by stale /proc/@pid/x entries | 		goto out; | ||||||
|  * @proc_dentry: directoy to prune. |  | ||||||
|  * |  | ||||||
|  * Shrink the /proc directory that was used by the just killed thread. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| void proc_pid_flush(struct dentry *proc_dentry) | 	name.name = buf; | ||||||
| { | 	name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); | ||||||
| 	might_sleep(); | 	leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); | ||||||
| 	if(proc_dentry != NULL) { | 	if (!leader) | ||||||
| 		shrink_dcache_parent(proc_dentry); | 		goto out; | ||||||
| 		dput(proc_dentry); |  | ||||||
|  | 	name.name = "task"; | ||||||
|  | 	name.len = strlen(name.name); | ||||||
|  | 	dir = d_hash_and_lookup(leader, &name); | ||||||
|  | 	if (!dir) | ||||||
|  | 		goto out_put_leader; | ||||||
|  |  | ||||||
|  | 	name.name = buf; | ||||||
|  | 	name.len = snprintf(buf, sizeof(buf), "%d", task->pid); | ||||||
|  | 	dentry = d_hash_and_lookup(dir, &name); | ||||||
|  | 	if (dentry) { | ||||||
|  | 		shrink_dcache_parent(dentry); | ||||||
|  | 		d_drop(dentry); | ||||||
|  | 		dput(dentry); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	dput(dir); | ||||||
|  | out_put_leader: | ||||||
|  | 	dput(leader); | ||||||
|  | out: | ||||||
|  | 	return; | ||||||
| } | } | ||||||
|  |  | ||||||
| /* SMP-safe */ | /* SMP-safe */ | ||||||
| @@ -1919,7 +1915,6 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||||||
| 	struct inode *inode; | 	struct inode *inode; | ||||||
| 	struct proc_inode *ei; | 	struct proc_inode *ei; | ||||||
| 	unsigned tgid; | 	unsigned tgid; | ||||||
| 	int died; |  | ||||||
|  |  | ||||||
| 	if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { | 	if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { | ||||||
| 		inode = new_inode(dir->i_sb); | 		inode = new_inode(dir->i_sb); | ||||||
| @@ -1965,23 +1960,16 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct | |||||||
| 	inode->i_nlink = 4; | 	inode->i_nlink = 4; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| 	dentry->d_op = &pid_base_dentry_operations; | 	dentry->d_op = &pid_dentry_operations; | ||||||
|  |  | ||||||
| 	died = 0; |  | ||||||
| 	d_add(dentry, inode); | 	d_add(dentry, inode); | ||||||
| 	spin_lock(&task->proc_lock); |  | ||||||
| 	task->proc_dentry = dentry; |  | ||||||
| 	if (!pid_alive(task)) { | 	if (!pid_alive(task)) { | ||||||
| 		dentry = proc_pid_unhash(task); | 		d_drop(dentry); | ||||||
| 		died = 1; | 		shrink_dcache_parent(dentry); | ||||||
| 	} |  | ||||||
| 	spin_unlock(&task->proc_lock); |  | ||||||
|  |  | ||||||
| 	put_task_struct(task); |  | ||||||
| 	if (died) { |  | ||||||
| 		proc_pid_flush(dentry); |  | ||||||
| 		goto out; | 		goto out; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	put_task_struct(task); | ||||||
| 	return NULL; | 	return NULL; | ||||||
| out: | out: | ||||||
| 	return ERR_PTR(-ENOENT); | 	return ERR_PTR(-ENOENT); | ||||||
| @@ -2024,7 +2012,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry | |||||||
| 	inode->i_nlink = 3; | 	inode->i_nlink = 3; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| 	dentry->d_op = &pid_base_dentry_operations; | 	dentry->d_op = &pid_dentry_operations; | ||||||
|  |  | ||||||
| 	d_add(dentry, inode); | 	d_add(dentry, inode); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -119,7 +119,6 @@ extern struct group_info init_groups; | |||||||
| 		.signal = {{0}}},					\ | 		.signal = {{0}}},					\ | ||||||
| 	.blocked	= {{0}},					\ | 	.blocked	= {{0}},					\ | ||||||
| 	.alloc_lock	= SPIN_LOCK_UNLOCKED,				\ | 	.alloc_lock	= SPIN_LOCK_UNLOCKED,				\ | ||||||
| 	.proc_lock	= SPIN_LOCK_UNLOCKED,				\ |  | ||||||
| 	.journal_info	= NULL,						\ | 	.journal_info	= NULL,						\ | ||||||
| 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\ | 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\ | ||||||
| 	.fs_excl	= ATOMIC_INIT(0),				\ | 	.fs_excl	= ATOMIC_INIT(0),				\ | ||||||
|   | |||||||
| @@ -99,9 +99,8 @@ extern void proc_misc_init(void); | |||||||
|  |  | ||||||
| struct mm_struct; | struct mm_struct; | ||||||
|  |  | ||||||
|  | void proc_flush_task(struct task_struct *task); | ||||||
| struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); | struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); | ||||||
| struct dentry *proc_pid_unhash(struct task_struct *p); |  | ||||||
| void proc_pid_flush(struct dentry *proc_dentry); |  | ||||||
| int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); | ||||||
| unsigned long task_vsize(struct mm_struct *); | unsigned long task_vsize(struct mm_struct *); | ||||||
| int task_statm(struct mm_struct *, int *, int *, int *, int *); | int task_statm(struct mm_struct *, int *, int *, int *, int *); | ||||||
| @@ -211,8 +210,7 @@ static inline void proc_net_remove(const char *name) | |||||||
| #define proc_net_create(name, mode, info)	({ (void)(mode), NULL; }) | #define proc_net_create(name, mode, info)	({ (void)(mode), NULL; }) | ||||||
| static inline void proc_net_remove(const char *name) {} | static inline void proc_net_remove(const char *name) {} | ||||||
|  |  | ||||||
| static inline struct dentry *proc_pid_unhash(struct task_struct *p) { return NULL; } | static inline void proc_flush_task(struct task_struct *task) { } | ||||||
| static inline void proc_pid_flush(struct dentry *proc_dentry) { } |  | ||||||
|  |  | ||||||
| static inline struct proc_dir_entry *create_proc_entry(const char *name, | static inline struct proc_dir_entry *create_proc_entry(const char *name, | ||||||
| 	mode_t mode, struct proc_dir_entry *parent) { return NULL; } | 	mode_t mode, struct proc_dir_entry *parent) { return NULL; } | ||||||
|   | |||||||
| @@ -842,8 +842,6 @@ struct task_struct { | |||||||
|    	u32 self_exec_id; |    	u32 self_exec_id; | ||||||
| /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ | /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */ | ||||||
| 	spinlock_t alloc_lock; | 	spinlock_t alloc_lock; | ||||||
| /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ |  | ||||||
| 	spinlock_t proc_lock; |  | ||||||
|  |  | ||||||
| #ifdef CONFIG_DEBUG_MUTEXES | #ifdef CONFIG_DEBUG_MUTEXES | ||||||
| 	/* mutex deadlock detection */ | 	/* mutex deadlock detection */ | ||||||
| @@ -856,7 +854,6 @@ struct task_struct { | |||||||
| /* VM state */ | /* VM state */ | ||||||
| 	struct reclaim_state *reclaim_state; | 	struct reclaim_state *reclaim_state; | ||||||
|  |  | ||||||
| 	struct dentry *proc_dentry; |  | ||||||
| 	struct backing_dev_info *backing_dev_info; | 	struct backing_dev_info *backing_dev_info; | ||||||
|  |  | ||||||
| 	struct io_context *io_context; | 	struct io_context *io_context; | ||||||
|   | |||||||
| @@ -137,12 +137,8 @@ void release_task(struct task_struct * p) | |||||||
| { | { | ||||||
| 	int zap_leader; | 	int zap_leader; | ||||||
| 	task_t *leader; | 	task_t *leader; | ||||||
| 	struct dentry *proc_dentry; |  | ||||||
|  |  | ||||||
| repeat: | repeat: | ||||||
| 	atomic_dec(&p->user->processes); | 	atomic_dec(&p->user->processes); | ||||||
| 	spin_lock(&p->proc_lock); |  | ||||||
| 	proc_dentry = proc_pid_unhash(p); |  | ||||||
| 	write_lock_irq(&tasklist_lock); | 	write_lock_irq(&tasklist_lock); | ||||||
| 	ptrace_unlink(p); | 	ptrace_unlink(p); | ||||||
| 	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | 	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); | ||||||
| @@ -171,8 +167,7 @@ repeat: | |||||||
|  |  | ||||||
| 	sched_exit(p); | 	sched_exit(p); | ||||||
| 	write_unlock_irq(&tasklist_lock); | 	write_unlock_irq(&tasklist_lock); | ||||||
| 	spin_unlock(&p->proc_lock); | 	proc_flush_task(p); | ||||||
| 	proc_pid_flush(proc_dentry); |  | ||||||
| 	release_thread(p); | 	release_thread(p); | ||||||
| 	call_rcu(&p->rcu, delayed_put_task_struct); | 	call_rcu(&p->rcu, delayed_put_task_struct); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -993,13 +993,10 @@ static task_t *copy_process(unsigned long clone_flags, | |||||||
| 		if (put_user(p->pid, parent_tidptr)) | 		if (put_user(p->pid, parent_tidptr)) | ||||||
| 			goto bad_fork_cleanup; | 			goto bad_fork_cleanup; | ||||||
|  |  | ||||||
| 	p->proc_dentry = NULL; |  | ||||||
|  |  | ||||||
| 	INIT_LIST_HEAD(&p->children); | 	INIT_LIST_HEAD(&p->children); | ||||||
| 	INIT_LIST_HEAD(&p->sibling); | 	INIT_LIST_HEAD(&p->sibling); | ||||||
| 	p->vfork_done = NULL; | 	p->vfork_done = NULL; | ||||||
| 	spin_lock_init(&p->alloc_lock); | 	spin_lock_init(&p->alloc_lock); | ||||||
| 	spin_lock_init(&p->proc_lock); |  | ||||||
|  |  | ||||||
| 	clear_tsk_thread_flag(p, TIF_SIGPENDING); | 	clear_tsk_thread_flag(p, TIF_SIGPENDING); | ||||||
| 	init_sigpending(&p->pending); | 	init_sigpending(&p->pending); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user