Merge branch 'akpm' (patches from Andrew)

Merge misc updates from Andrew Morton: "Bite-sized chunks this time, to avoid the MTA ratelimiting woes. - fs/notify updates - ocfs2 - some of MM" That laconic "some MM" is mainly the removal of remap_file_pages(), which is a big simplification of the VM, and which gets rid of a *lot* of random cruft and special cases because we no longer support the non-linear mappings that it used. From a user interface perspective, nothing has changed, because the remap_file_pages() syscall still exists, it's just done by emulating the old behavior by creating a lot of individual small mappings instead of one non-linear one. The emulation is slower than the old "native" non-linear mappings, but nobody really uses or cares about remap_file_pages(), and simplifying the VM is a big advantage. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (78 commits) memcg: zap memcg_slab_caches and memcg_slab_mutex memcg: zap memcg_name argument of memcg_create_kmem_cache memcg: zap __memcg_{charge,uncharge}_slab mm/page_alloc.c: place zone_id check before VM_BUG_ON_PAGE check mm: hugetlb: fix type of hugetlb_treat_as_movable variable mm, hugetlb: remove unnecessary lower bound on sysctl handlers"? mm: memory: merge shared-writable dirtying branches in do_wp_page() mm: memory: remove ->vm_file check on shared writable vmas xtensa: drop _PAGE_FILE and pte_file()-related helpers x86: drop _PAGE_FILE and pte_file()-related helpers unicore32: drop pte_file()-related helpers um: drop _PAGE_FILE and pte_file()-related helpers tile: drop pte_file()-related helpers sparc: drop pte_file()-related helpers sh: drop _PAGE_FILE and pte_file()-related helpers score: drop _PAGE_FILE and pte_file()-related helpers s390: drop pte_file()-related helpers parisc: drop _PAGE_FILE and pte_file()-related helpers openrisc: drop _PAGE_FILE and pte_file()-related helpers nios2: drop _PAGE_FILE and pte_file()-related helpers ...
2015-02-10 16:45:56 -08:00
parent b2718bffb4 d5b3cf7139
commit 992de5a8ec
142 changed files with 641 additions and 2182 deletions
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
-#define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_ARCH_1	0x01000000	/* Architecture-specific flag */
 #define VM_ARCH_2	0x02000000
 #define VM_DONTDUMP	0x04000000	/* Do not include in the core dump */
@@ -206,21 +205,19 @@ extern unsigned int kobjsize(const void *objp);
 extern pgprot_t protection_map[16];

 #define FAULT_FLAG_WRITE	0x01	/* Fault was a write access */
-#define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
-#define FAULT_FLAG_MKWRITE	0x04	/* Fault was mkwrite of existing pte */
-#define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
-#define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
-#define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
-#define FAULT_FLAG_TRIED	0x40	/* second try */
-#define FAULT_FLAG_USER		0x80	/* The fault originated in userspace */
+#define FAULT_FLAG_MKWRITE	0x02	/* Fault was mkwrite of existing pte */
+#define FAULT_FLAG_ALLOW_RETRY	0x04	/* Retry fault if blocking */
+#define FAULT_FLAG_RETRY_NOWAIT	0x08	/* Don't drop mmap_sem and wait when retrying */
+#define FAULT_FLAG_KILLABLE	0x10	/* The fault task is in SIGKILL killable region */
+#define FAULT_FLAG_TRIED	0x20	/* Second try */
+#define FAULT_FLAG_USER		0x40	/* The fault originated in userspace */

 /*
 * vm_fault is filled by the the pagefault handler and passed to the vma's
 * ->fault function. The vma's ->fault is responsible for returning a bitmask
 * of VM_FAULT_xxx flags that give details about how the fault was handled.
 *
- * pgoff should be used in favour of virtual_address, if possible. If pgoff
- * is used, one may implement ->remap_pages to get nonlinear mapping support.
+ * pgoff should be used in favour of virtual_address, if possible.
 */
 struct vm_fault {
 	unsigned int flags;		/* FAULT_FLAG_xxx flags */
@@ -287,10 +284,6 @@ struct vm_operations_struct {
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
 					unsigned long addr);
 #endif
-	/* called by sys_remap_file_pages() to populate non-linear mapping */
-	int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
-			   unsigned long size, pgoff_t pgoff);
-
 	/*
 	 * Called by vm_normal_page() for special PTEs to find the
 	 * page for @addr.  This is useful if the default behavior
@@ -454,6 +447,12 @@ static inline struct page *compound_head_by_tail(struct page *tail)
 	return tail;
 }

+/*
+ * Since either compound page could be dismantled asynchronously in THP
+ * or we access asynchronously arbitrary positioned struct page, there
+ * would be tail flag race. To handle this race, we should call
+ * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
+ */
 static inline struct page *compound_head(struct page *page)
 {
 	if (unlikely(PageTail(page)))
@@ -461,6 +460,18 @@ static inline struct page *compound_head(struct page *page)
 	return page;
 }

+/*
+ * If we access compound page synchronously such as access to
+ * allocated page, there is no need to handle tail flag race, so we can
+ * check tail flag directly without any synchronization primitive.
+ */
+static inline struct page *compound_head_fast(struct page *page)
+{
+	if (unlikely(PageTail(page)))
+		return page->first_page;
+	return page;
+}
+
 /*
 * The atomic page->_mapcount, starts from -1: so that transitions
 * both from it and to it can be tracked, using atomic_inc_and_test
@@ -539,7 +550,14 @@ static inline void get_page(struct page *page)
 static inline struct page *virt_to_head_page(const void *x)
 {
 	struct page *page = virt_to_page(x);
-	return compound_head(page);
+
+	/*
+	 * We don't need to worry about synchronization of tail flag
+	 * when we call virt_to_head_page() since it is only called for
+	 * already allocated page and this page won't be freed until
+	 * this virt_to_head_page() is finished. So use _fast variant.
+	 */
+	return compound_head_fast(page);
 }

 /*
@@ -1129,7 +1147,6 @@ extern void user_shm_unlock(size_t, struct user_struct *);
 * Parameter block passed down to zap_pte_range in exceptional cases.
 */
 struct zap_details {
-	struct vm_area_struct *nonlinear_vma;	/* Check page->index if set */
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
@@ -1785,12 +1802,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
 	for (vma = vma_interval_tree_iter_first(root, start, last);	\
 	     vma; vma = vma_interval_tree_iter_next(vma, start, last))

-static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
-					struct list_head *list)
-{
-	list_add_tail(&vma->shared.nonlinear, list);
-}
-
 void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
 				   struct rb_root *root);
 void anon_vma_interval_tree_remove(struct anon_vma_chain *node,