summaryrefslogtreecommitdiff
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c122
1 files changed, 77 insertions, 45 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9474dbc150ed..9237976abe72 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -163,12 +163,17 @@ static struct shrinker huge_zero_page_shrinker = {
static ssize_t enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
+ const char *output;
+
if (test_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "[always] madvise never\n");
- else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "always [madvise] never\n");
+ output = "[always] madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
+ &transparent_hugepage_flags))
+ output = "always [madvise] never";
else
- return sprintf(buf, "always madvise [never]\n");
+ output = "always madvise [never]";
+
+ return sysfs_emit(buf, "%s\n", output);
}
static ssize_t enabled_store(struct kobject *kobj,
@@ -200,11 +205,11 @@ static struct kobj_attribute enabled_attr =
__ATTR(enabled, 0644, enabled_show, enabled_store);
ssize_t single_hugepage_flag_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf,
- enum transparent_hugepage_flag flag)
+ struct kobj_attribute *attr, char *buf,
+ enum transparent_hugepage_flag flag)
{
- return sprintf(buf, "%d\n",
- !!test_bit(flag, &transparent_hugepage_flags));
+ return sysfs_emit(buf, "%d\n",
+ !!test_bit(flag, &transparent_hugepage_flags));
}
ssize_t single_hugepage_flag_store(struct kobject *kobj,
@@ -232,15 +237,24 @@ ssize_t single_hugepage_flag_store(struct kobject *kobj,
static ssize_t defrag_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "[always] defer defer+madvise madvise never\n");
- if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "always [defer] defer+madvise madvise never\n");
- if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "always defer [defer+madvise] madvise never\n");
- if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
- return sprintf(buf, "always defer defer+madvise [madvise] never\n");
- return sprintf(buf, "always defer defer+madvise madvise [never]\n");
+ const char *output;
+
+ if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
+ &transparent_hugepage_flags))
+ output = "[always] defer defer+madvise madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
+ &transparent_hugepage_flags))
+ output = "always [defer] defer+madvise madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
+ &transparent_hugepage_flags))
+ output = "always defer [defer+madvise] madvise never";
+ else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
+ &transparent_hugepage_flags))
+ output = "always defer defer+madvise [madvise] never";
+ else
+ output = "always defer defer+madvise madvise [never]";
+
+ return sysfs_emit(buf, "%s\n", output);
}
static ssize_t defrag_store(struct kobject *kobj,
@@ -281,10 +295,10 @@ static struct kobj_attribute defrag_attr =
__ATTR(defrag, 0644, defrag_show, defrag_store);
static ssize_t use_zero_page_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+ struct kobj_attribute *attr, char *buf)
{
return single_hugepage_flag_show(kobj, attr, buf,
- TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
+ TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
}
static ssize_t use_zero_page_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t count)
@@ -296,9 +310,9 @@ static struct kobj_attribute use_zero_page_attr =
__ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
static ssize_t hpage_pmd_size_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+ struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%lu\n", HPAGE_PMD_SIZE);
+ return sysfs_emit(buf, "%lu\n", HPAGE_PMD_SIZE);
}
static struct kobj_attribute hpage_pmd_size_attr =
__ATTR_RO(hpage_pmd_size);
@@ -470,7 +484,7 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
#ifdef CONFIG_MEMCG
static inline struct deferred_split *get_deferred_split_queue(struct page *page)
{
- struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+ struct mem_cgroup *memcg = page_memcg(compound_head(page));
struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
if (memcg)
@@ -710,7 +724,6 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
transparent_hugepage_use_zero_page()) {
pgtable_t pgtable;
struct page *zero_page;
- bool set;
vm_fault_t ret;
pgtable = pte_alloc_one(vma->vm_mm);
if (unlikely(!pgtable))
@@ -723,25 +736,25 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
}
vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
ret = 0;
- set = false;
if (pmd_none(*vmf->pmd)) {
ret = check_stable_address_space(vma->vm_mm);
if (ret) {
spin_unlock(vmf->ptl);
+ pte_free(vma->vm_mm, pgtable);
} else if (userfaultfd_missing(vma)) {
spin_unlock(vmf->ptl);
+ pte_free(vma->vm_mm, pgtable);
ret = handle_userfault(vmf, VM_UFFD_MISSING);
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
} else {
set_huge_zero_page(pgtable, vma->vm_mm, vma,
haddr, vmf->pmd, zero_page);
spin_unlock(vmf->ptl);
- set = true;
}
- } else
+ } else {
spin_unlock(vmf->ptl);
- if (!set)
pte_free(vma->vm_mm, pgtable);
+ }
return ret;
}
gfp = alloc_hugepage_direct_gfpmask(vma);
@@ -2322,7 +2335,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
- enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+ enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
bool unmap_success;
@@ -2346,6 +2359,27 @@ static void remap_page(struct page *page, unsigned int nr)
}
}
+static void lru_add_page_tail(struct page *head, struct page *tail,
+ struct lruvec *lruvec, struct list_head *list)
+{
+ VM_BUG_ON_PAGE(!PageHead(head), head);
+ VM_BUG_ON_PAGE(PageCompound(tail), head);
+ VM_BUG_ON_PAGE(PageLRU(tail), head);
+ lockdep_assert_held(&lruvec->lru_lock);
+
+ if (list) {
+ /* page reclaim is reclaiming a huge page */
+ VM_WARN_ON(PageLRU(head));
+ get_page(tail);
+ list_add_tail(&tail->lru, list);
+ } else {
+ /* head is still on lru (and we have it frozen) */
+ VM_WARN_ON(!PageLRU(head));
+ SetPageLRU(tail);
+ list_add_tail(&tail->lru, &head->lru);
+ }
+}
+
static void __split_huge_page_tail(struct page *head, int tail,
struct lruvec *lruvec, struct list_head *list)
{
@@ -2357,7 +2391,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
* Clone page flags before unfreezing refcount.
*
* After successful get_page_unless_zero() might follow flags change,
- * for exmaple lock_page() which set PG_waiters.
+ * for example lock_page() which set PG_waiters.
*/
page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
page_tail->flags |= (head->flags &
@@ -2412,18 +2446,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
}
static void __split_huge_page(struct page *page, struct list_head *list,
- pgoff_t end, unsigned long flags)
+ pgoff_t end)
{
struct page *head = compound_head(page);
- pg_data_t *pgdat = page_pgdat(head);
struct lruvec *lruvec;
struct address_space *swap_cache = NULL;
unsigned long offset = 0;
unsigned int nr = thp_nr_pages(head);
int i;
- lruvec = mem_cgroup_page_lruvec(head, pgdat);
-
/* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head);
@@ -2435,6 +2466,9 @@ static void __split_huge_page(struct page *page, struct list_head *list,
xa_lock(&swap_cache->i_pages);
}
+ /* lock lru list/PageCompound, ref freezed by page_ref_freeze */
+ lruvec = lock_page_lruvec(head);
+
for (i = nr - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond i_size: drop them from page cache */
@@ -2454,6 +2488,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
}
ClearPageCompound(head);
+ unlock_page_lruvec(lruvec);
+ /* Caller disabled irqs, so they are still disabled here */
split_page_owner(head, nr);
@@ -2471,8 +2507,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
page_ref_add(head, 2);
xa_unlock(&head->mapping->i_pages);
}
-
- spin_unlock_irqrestore(&pgdat->lru_lock, flags);
+ local_irq_enable();
remap_page(head, nr);
@@ -2618,12 +2653,10 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
int split_huge_page_to_list(struct page *page, struct list_head *list)
{
struct page *head = compound_head(page);
- struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
struct deferred_split *ds_queue = get_deferred_split_queue(head);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
int count, mapcount, extra_pins, ret;
- unsigned long flags;
pgoff_t end;
VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
@@ -2684,9 +2717,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
unmap_page(head);
VM_BUG_ON_PAGE(compound_mapcount(head), head);
- /* prevent PageLRU to go away from under us, and freeze lru stats */
- spin_lock_irqsave(&pgdata->lru_lock, flags);
-
+ /* block interrupt reentry in xa_lock and spinlock */
+ local_irq_disable();
if (mapping) {
XA_STATE(xas, &mapping->i_pages, page_index(head));
@@ -2711,12 +2743,12 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
spin_unlock(&ds_queue->split_queue_lock);
if (mapping) {
if (PageSwapBacked(head))
- __dec_node_page_state(head, NR_SHMEM_THPS);
+ __dec_lruvec_page_state(head, NR_SHMEM_THPS);
else
- __dec_node_page_state(head, NR_FILE_THPS);
+ __dec_lruvec_page_state(head, NR_FILE_THPS);
}
- __split_huge_page(page, list, end, flags);
+ __split_huge_page(page, list, end);
ret = 0;
} else {
if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
@@ -2730,7 +2762,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
spin_unlock(&ds_queue->split_queue_lock);
fail: if (mapping)
xa_unlock(&mapping->i_pages);
- spin_unlock_irqrestore(&pgdata->lru_lock, flags);
+ local_irq_enable();
remap_page(head, thp_nr_pages(head));
ret = -EBUSY;
}
@@ -2765,7 +2797,7 @@ void deferred_split_huge_page(struct page *page)
{
struct deferred_split *ds_queue = get_deferred_split_queue(page);
#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+ struct mem_cgroup *memcg = page_memcg(compound_head(page));
#endif
unsigned long flags;