[PATCH] mm: unmap_vmas with inner ptlock

Remove the page_table_lock from around the calls to unmap_vmas, and replace
the pte_offset_map in zap_pte_range by pte_offset_map_lock: all callers are
now safe to descend without page_table_lock.

Don't attempt fancy locking for hugepages, just take page_table_lock in
unmap_hugepage_range.  Which makes zap_hugepage_range, and the hugetlb test in
zap_page_range, redundant: unmap_vmas calls unmap_hugepage_range anyway.  Nor
does unmap_vmas have much use for its mm arg now.

The tlb_start_vma and tlb_end_vma in unmap_page_range are now called without
page_table_lock: if they're implemented at all, they typically come down to
flush_cache_range (usually done outside page_table_lock) and flush_tlb_range
(which we already audited for the mprotect case).

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Hugh Dickins
2005-10-29 18:16:30 -07:00
committed by Linus Torvalds
parent 8f4f8c164c
commit 508034a32b
6 changed files with 21 additions and 54 deletions

View File

@@ -314,6 +314,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
BUG_ON(start & ~HPAGE_MASK);
BUG_ON(end & ~HPAGE_MASK);
spin_lock(&mm->page_table_lock);
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
@@ -333,17 +335,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
put_page(page);
add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
}
flush_tlb_range(vma, start, end);
}
void zap_hugepage_range(struct vm_area_struct *vma,
unsigned long start, unsigned long length)
{
struct mm_struct *mm = vma->vm_mm;
spin_lock(&mm->page_table_lock);
unmap_hugepage_range(vma, start, start + length);
spin_unlock(&mm->page_table_lock);
flush_tlb_range(vma, start, end);
}
int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)

View File

@@ -551,10 +551,11 @@ static void zap_pte_range(struct mmu_gather *tlb,
{
struct mm_struct *mm = tlb->mm;
pte_t *pte;
spinlock_t *ptl;
int file_rss = 0;
int anon_rss = 0;
pte = pte_offset_map(pmd, addr);
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
do {
pte_t ptent = *pte;
if (pte_none(ptent))
@@ -621,7 +622,7 @@ static void zap_pte_range(struct mmu_gather *tlb,
} while (pte++, addr += PAGE_SIZE, addr != end);
add_mm_rss(mm, file_rss, anon_rss);
pte_unmap(pte - 1);
pte_unmap_unlock(pte - 1, ptl);
}
static inline void zap_pmd_range(struct mmu_gather *tlb,
@@ -690,7 +691,6 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
/**
* unmap_vmas - unmap a range of memory covered by a list of vma's
* @tlbp: address of the caller's struct mmu_gather
* @mm: the controlling mm_struct
* @vma: the starting vma
* @start_addr: virtual address at which to start unmapping
* @end_addr: virtual address at which to end unmapping
@@ -699,10 +699,10 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
*
* Returns the end address of the unmapping (restart addr if interrupted).
*
* Unmap all pages in the vma list. Called under page_table_lock.
* Unmap all pages in the vma list.
*
* We aim to not hold page_table_lock for too long (for scheduling latency
* reasons). So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to
* We aim to not hold locks for too long (for scheduling latency reasons).
* So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to
* return the ending mmu_gather to the caller.
*
* Only addresses between `start' and `end' will be unmapped.
@@ -714,7 +714,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
unsigned long unmap_vmas(struct mmu_gather **tlbp,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
@@ -764,19 +764,15 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
tlb_finish_mmu(*tlbp, tlb_start, start);
if (need_resched() ||
need_lockbreak(&mm->page_table_lock) ||
(i_mmap_lock && need_lockbreak(i_mmap_lock))) {
if (i_mmap_lock) {
/* must reset count of rss freed */
*tlbp = tlb_gather_mmu(mm, fullmm);
*tlbp = NULL;
goto out;
}
spin_unlock(&mm->page_table_lock);
cond_resched();
spin_lock(&mm->page_table_lock);
}
*tlbp = tlb_gather_mmu(mm, fullmm);
*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
tlb_start_valid = 0;
zap_bytes = ZAP_BLOCK_SIZE;
}
@@ -800,18 +796,12 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long end = address + size;
unsigned long nr_accounted = 0;
if (is_vm_hugetlb_page(vma)) {
zap_hugepage_range(vma, address, size);
return end;
}
lru_add_drain();
tlb = tlb_gather_mmu(mm, 0);
update_hiwater_rss(mm);
spin_lock(&mm->page_table_lock);
end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
spin_unlock(&mm->page_table_lock);
tlb_finish_mmu(tlb, address, end);
end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
if (tlb)
tlb_finish_mmu(tlb, address, end);
return end;
}
@@ -1434,13 +1424,6 @@ again:
restart_addr = zap_page_range(vma, start_addr,
end_addr - start_addr, details);
/*
* We cannot rely on the break test in unmap_vmas:
* on the one hand, we don't want to restart our loop
* just because that broke out for the page_table_lock;
* on the other hand, it does no test when vma is small.
*/
need_break = need_resched() ||
need_lockbreak(details->i_mmap_lock);

View File

@@ -1673,9 +1673,7 @@ static void unmap_region(struct mm_struct *mm,
lru_add_drain();
tlb = tlb_gather_mmu(mm, 0);
update_hiwater_rss(mm);
spin_lock(&mm->page_table_lock);
unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
spin_unlock(&mm->page_table_lock);
unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
@@ -1958,9 +1956,7 @@ void exit_mmap(struct mm_struct *mm)
tlb = tlb_gather_mmu(mm, 1);
/* Don't update_hiwater_rss(mm) here, do_exit already did */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
spin_lock(&mm->page_table_lock);
end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
spin_unlock(&mm->page_table_lock);
end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
tlb_finish_mmu(tlb, 0, end);