diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-28 10:28:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-28 10:28:11 -0700 |
commit | 6e17c6de3ddf3073741d9c91a796ee696914d8a0 (patch) | |
tree | 2c425707f78642625dbe2c824c7fded2021e3dc7 /mm/huge_memory.c | |
parent | 6aeadf7896bff4ca230702daba8788455e6b866e (diff) | |
parent | acc72d59c7509540c27c49625cb4b5a8db1f1a84 (diff) | |
download | linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.gz linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.bz2 linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.zip |
Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull mm updates from Andrew Morton:
- Yosry Ahmed brought back some cgroup v1 stats in OOM logs
- Yosry has also eliminated cgroup's atomic rstat flushing
- Nhat Pham adds the new cachestat() syscall. It provides userspace
with the ability to query pagecache status - a similar concept to
mincore() but more powerful and with improved usability
- Mel Gorman provides more optimizations for compaction, reducing the
prevalence of page rescanning
- Lorenzo Stoakes has done some maintanance work on the
get_user_pages() interface
- Liam Howlett continues with cleanups and maintenance work to the
maple tree code. Peng Zhang also does some work on maple tree
- Johannes Weiner has done some cleanup work on the compaction code
- David Hildenbrand has contributed additional selftests for
get_user_pages()
- Thomas Gleixner has contributed some maintenance and optimization
work for the vmalloc code
- Baolin Wang has provided some compaction cleanups,
- SeongJae Park continues maintenance work on the DAMON code
- Huang Ying has done some maintenance on the swap code's usage of
device refcounting
- Christoph Hellwig has some cleanups for the filemap/directio code
- Ryan Roberts provides two patch series which yield some
rationalization of the kernel's access to pte entries - use the
provided APIs rather than open-coding accesses
- Lorenzo Stoakes has some fixes to the interaction between pagecache
and directio access to file mappings
- John Hubbard has a series of fixes to the MM selftesting code
- ZhangPeng continues the folio conversion campaign
- Hugh Dickins has been working on the pagetable handling code, mainly
with a view to reducing the load on the mmap_lock
- Catalin Marinas has reduced the arm64 kmalloc() minimum alignment
from 128 to 8
- Domenico Cerasuolo has improved the zswap reclaim mechanism by
reorganizing the LRU management
- Matthew Wilcox provides some fixups to make gfs2 work better with the
buffer_head code
- Vishal Moola also has done some folio conversion work
- Matthew Wilcox has removed the remnants of the pagevec code - their
functionality is migrated over to struct folio_batch
* tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits)
mm/hugetlb: remove hugetlb_set_page_subpool()
mm: nommu: correct the range of mmap_sem_read_lock in task_mem()
hugetlb: revert use of page_cache_next_miss()
Revert "page cache: fix page_cache_next/prev_miss off by one"
mm/vmscan: fix root proactive reclaim unthrottling unbalanced node
mm: memcg: rename and document global_reclaim()
mm: kill [add|del]_page_to_lru_list()
mm: compaction: convert to use a folio in isolate_migratepages_block()
mm: zswap: fix double invalidate with exclusive loads
mm: remove unnecessary pagevec includes
mm: remove references to pagevec
mm: rename invalidate_mapping_pagevec to mapping_try_invalidate
mm: remove struct pagevec
net: convert sunrpc from pagevec to folio_batch
i915: convert i915_gpu_error to use a folio_batch
pagevec: rename fbatch_count()
mm: remove check_move_unevictable_pages()
drm: convert drm_gem_put_pages() to use a folio_batch
i915: convert shmem_sg_free_table() to use a folio_batch
scatterlist: add sg_set_folio()
...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 56 |
1 files changed, 36 insertions, 20 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 624671aaa60d..eb3678360b97 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -583,7 +583,7 @@ void prep_transhuge_page(struct page *page) VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); INIT_LIST_HEAD(&folio->_deferred_list); - set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); + folio_set_compound_dtor(folio, TRANSHUGE_PAGE_DTOR); } static inline bool is_transparent_hugepage(struct page *page) @@ -1344,7 +1344,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) /* * See do_wp_page(): we can only reuse the folio exclusively if * there are no additional references. Note that we always drain - * the LRU pagevecs immediately after adding a THP. + * the LRU cache immediately after adding a THP. */ if (folio_ref_count(folio) > 1 + folio_test_swapcache(folio) * folio_nr_pages(folio)) @@ -1760,9 +1760,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, /* * The destination pmd shouldn't be established, free_pgtables() - * should have release it. + * should have released it; but move_page_tables() might have already + * inserted a page table, if racing against shmem/file collapse. */ - if (WARN_ON(!pmd_none(*new_pmd))) { + if (!pmd_none(*new_pmd)) { VM_BUG_ON(pmd_trans_huge(*new_pmd)); return false; } @@ -2036,6 +2037,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; pgtable_t pgtable; pmd_t _pmd, old_pmd; + unsigned long addr; + pte_t *pte; int i; /* @@ -2051,17 +2054,20 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); - for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { - pte_t *pte, entry; - entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot); + pte = pte_offset_map(&_pmd, haddr); + VM_BUG_ON(!pte); + for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) { + pte_t entry; + + entry = pfn_pte(my_zero_pfn(addr), vma->vm_page_prot); entry = pte_mkspecial(entry); if (pmd_uffd_wp(old_pmd)) entry = pte_mkuffd_wp(entry); - pte = pte_offset_map(&_pmd, haddr); - VM_BUG_ON(!pte_none(*pte)); - set_pte_at(mm, haddr, pte, entry); - pte_unmap(pte); + VM_BUG_ON(!pte_none(ptep_get(pte))); + set_pte_at(mm, addr, pte, entry); + pte++; } + pte_unmap(pte - 1); smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); } @@ -2076,6 +2082,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false; bool anon_exclusive = false, dirty = false; unsigned long addr; + pte_t *pte; int i; VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); @@ -2204,8 +2211,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); + pte = pte_offset_map(&_pmd, haddr); + VM_BUG_ON(!pte); for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) { - pte_t entry, *pte; + pte_t entry; /* * Note that NUMA hinting access restrictions are not * transferred to avoid any possibility of altering @@ -2248,11 +2257,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = pte_mkuffd_wp(entry); page_add_anon_rmap(page + i, vma, addr, false); } - pte = pte_offset_map(&_pmd, addr); - BUG_ON(!pte_none(*pte)); + VM_BUG_ON(!pte_none(ptep_get(pte))); set_pte_at(mm, addr, pte, entry); - pte_unmap(pte); + pte++; } + pte_unmap(pte - 1); if (!pmd_migration) page_remove_rmap(page, vma, true); @@ -2792,12 +2801,19 @@ void free_transhuge_page(struct page *page) struct deferred_split *ds_queue = get_deferred_split_queue(folio); unsigned long flags; - spin_lock_irqsave(&ds_queue->split_queue_lock, flags); - if (!list_empty(&folio->_deferred_list)) { - ds_queue->split_queue_len--; - list_del(&folio->_deferred_list); + /* + * At this point, there is no one trying to add the folio to + * deferred_list. If folio is not in deferred_list, it's safe + * to check without acquiring the split_queue_lock. + */ + if (data_race(!list_empty(&folio->_deferred_list))) { + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + if (!list_empty(&folio->_deferred_list)) { + ds_queue->split_queue_len--; + list_del(&folio->_deferred_list); + } + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); } - spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); free_compound_page(page); } |