diff options
author | Qi Zheng <zhengqi.arch@bytedance.com> | 2024-09-26 14:46:18 +0800 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2024-11-05 16:56:27 -0800 |
commit | fc9c45b71f43cafcc0435dd4c7a2d3b99955a0fa (patch) | |
tree | be777477db342907a5f60c412a141641fc8caa0f | |
parent | c85507857bb8904f8631b3a89b19aa73b1f77e48 (diff) | |
download | linux-fc9c45b71f43cafcc0435dd4c7a2d3b99955a0fa.tar.gz linux-fc9c45b71f43cafcc0435dd4c7a2d3b99955a0fa.tar.bz2 linux-fc9c45b71f43cafcc0435dd4c7a2d3b99955a0fa.zip |
arm: adjust_pte() use pte_offset_map_rw_nolock()
In do_adjust_pte(), we may modify the pte entry. The corresponding pmd
entry may have been modified concurrently. Therefore, in order to ensure
the stability if pmd entry, use pte_offset_map_rw_nolock() to replace
pte_offset_map_nolock(), and do pmd_same() check after holding the PTL.
All callers of update_mmu_cache_range() hold the vmf->ptl, so we can
determined whether split PTE locks is being used by doing the following,
just as we do elsewhere in the kernel.
ptl != vmf->ptl
And then we can delete the do_pte_lock() and do_pte_unlock().
Link: https://lkml.kernel.org/r/0eaf6b69aeb2fe35092a633fed12537efe645303.1727332572.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r-- | arch/arm/mm/fault-armv.c | 53 |
1 files changed, 22 insertions, 31 deletions
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 831793cd6ff9..2bec87c3327d 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -61,32 +61,8 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, return ret; } -#if defined(CONFIG_SPLIT_PTE_PTLOCKS) -/* - * If we are using split PTE locks, then we need to take the page - * lock here. Otherwise we are using shared mm->page_table_lock - * which is already locked, thus cannot take it. - */ -static inline void do_pte_lock(spinlock_t *ptl) -{ - /* - * Use nested version here to indicate that we are already - * holding one similar spinlock. - */ - spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); -} - -static inline void do_pte_unlock(spinlock_t *ptl) -{ - spin_unlock(ptl); -} -#else /* !defined(CONFIG_SPLIT_PTE_PTLOCKS) */ -static inline void do_pte_lock(spinlock_t *ptl) {} -static inline void do_pte_unlock(spinlock_t *ptl) {} -#endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ - static int adjust_pte(struct vm_area_struct *vma, unsigned long address, - unsigned long pfn) + unsigned long pfn, struct vm_fault *vmf) { spinlock_t *ptl; pgd_t *pgd; @@ -94,6 +70,7 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, pud_t *pud; pmd_t *pmd; pte_t *pte; + pmd_t pmdval; int ret; pgd = pgd_offset(vma->vm_mm, address); @@ -112,20 +89,33 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, if (pmd_none_or_clear_bad(pmd)) return 0; +again: /* * This is called while another page table is mapped, so we * must use the nested version. This also means we need to * open-code the spin-locking. */ - pte = pte_offset_map_nolock(vma->vm_mm, pmd, address, &ptl); + pte = pte_offset_map_rw_nolock(vma->vm_mm, pmd, address, &pmdval, &ptl); if (!pte) return 0; - do_pte_lock(ptl); + /* + * If we are using split PTE locks, then we need to take the page + * lock here. Otherwise we are using shared mm->page_table_lock + * which is already locked, thus cannot take it. + */ + if (ptl != vmf->ptl) { + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); + if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) { + pte_unmap_unlock(pte, ptl); + goto again; + } + } ret = do_adjust_pte(vma, address, pfn, pte); - do_pte_unlock(ptl); + if (ptl != vmf->ptl) + spin_unlock(ptl); pte_unmap(pte); return ret; @@ -133,7 +123,8 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, static void make_coherent(struct address_space *mapping, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, unsigned long pfn) + unsigned long addr, pte_t *ptep, unsigned long pfn, + struct vm_fault *vmf) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *mpnt; @@ -160,7 +151,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma, if (!(mpnt->vm_flags & VM_MAYSHARE)) continue; offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; - aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn); + aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf); } flush_dcache_mmap_unlock(mapping); if (aliases) @@ -203,7 +194,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, __flush_dcache_folio(mapping, folio); if (mapping) { if (cache_is_vivt()) - make_coherent(mapping, vma, addr, ptep, pfn); + make_coherent(mapping, vma, addr, ptep, pfn, vmf); else if (vma->vm_flags & VM_EXEC) __flush_icache_all(); } |