mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2026-01-27 12:47:24 +01:00
fs: avoid mmap sem relocks when coredumping with many missing pages
Dumping processes with large allocated and mostly not-faulted areas is
very slow.
Borrowing a test case from Tavian Barnes:
int main(void) {
char *mem = mmap(NULL, 1ULL << 40, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
printf("%p %m\n", mem);
if (mem != MAP_FAILED) {
mem[0] = 1;
}
abort();
}
That's 1TB of almost completely not-populated area.
On my test box it takes 13-14 seconds to dump.
The profile shows:
- 99.89% 0.00% a.out
entry_SYSCALL_64_after_hwframe
do_syscall_64
syscall_exit_to_user_mode
arch_do_signal_or_restart
- get_signal
- 99.89% do_coredump
- 99.88% elf_core_dump
- dump_user_range
- 98.12% get_dump_page
- 64.19% __get_user_pages
- 40.92% gup_vma_lookup
- find_vma
- mt_find
4.21% __rcu_read_lock
1.33% __rcu_read_unlock
- 3.14% check_vma_flags
0.68% vma_is_secretmem
0.61% __cond_resched
0.60% vma_pgtable_walk_end
0.59% vma_pgtable_walk_begin
0.58% no_page_table
- 15.13% down_read_killable
0.69% __cond_resched
13.84% up_read
0.58% __cond_resched
Almost 29% of the time is spent relocking the mmap semaphore between
calls to get_dump_page() which find nothing.
Whacking that results in times of 10 seconds (down from 13-14).
While here make the thing killable.
The real problem is the page-sized iteration and the real fix would
patch it up instead. It is left as an exercise for the mm-familiar
reader.
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://lore.kernel.org/r/20250119103205.2172432-1-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
parent
f9835fa147
commit
d6ff4c8f65
|
|
@ -27,9 +27,10 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
|
|||
int ret = 1;
|
||||
unsigned long addr;
|
||||
void *tags = NULL;
|
||||
int locked = 0;
|
||||
|
||||
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
|
||||
struct page *page = get_dump_page(addr);
|
||||
struct page *page = get_dump_page(addr, &locked);
|
||||
|
||||
/*
|
||||
* get_dump_page() returns NULL when encountering an empty
|
||||
|
|
|
|||
|
|
@ -925,14 +925,23 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
|
|||
{
|
||||
unsigned long addr;
|
||||
struct page *dump_page;
|
||||
int locked, ret;
|
||||
|
||||
dump_page = dump_page_alloc();
|
||||
if (!dump_page)
|
||||
return 0;
|
||||
|
||||
ret = 0;
|
||||
locked = 0;
|
||||
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
|
||||
struct page *page;
|
||||
|
||||
if (!locked) {
|
||||
if (mmap_read_lock_killable(current->mm))
|
||||
goto out;
|
||||
locked = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* To avoid having to allocate page tables for virtual address
|
||||
* ranges that have never been used yet, and also to make it
|
||||
|
|
@ -940,21 +949,38 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
|
|||
* NULL when encountering an empty page table entry that would
|
||||
* otherwise have been filled with the zero page.
|
||||
*/
|
||||
page = get_dump_page(addr);
|
||||
page = get_dump_page(addr, &locked);
|
||||
if (page) {
|
||||
if (locked) {
|
||||
mmap_read_unlock(current->mm);
|
||||
locked = 0;
|
||||
}
|
||||
int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
|
||||
put_page(page);
|
||||
if (stop) {
|
||||
dump_page_free(dump_page);
|
||||
return 0;
|
||||
}
|
||||
if (stop)
|
||||
goto out;
|
||||
} else {
|
||||
dump_skip(cprm, PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (dump_interrupted())
|
||||
goto out;
|
||||
|
||||
if (!need_resched())
|
||||
continue;
|
||||
if (locked) {
|
||||
mmap_read_unlock(current->mm);
|
||||
locked = 0;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
ret = 1;
|
||||
out:
|
||||
if (locked)
|
||||
mmap_read_unlock(current->mm);
|
||||
|
||||
dump_page_free(dump_page);
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -2549,7 +2549,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
|
|||
struct task_struct *task, bool bypass_rlim);
|
||||
|
||||
struct kvec;
|
||||
struct page *get_dump_page(unsigned long addr);
|
||||
struct page *get_dump_page(unsigned long addr, int *locked);
|
||||
|
||||
bool folio_mark_dirty(struct folio *folio);
|
||||
bool folio_mark_dirty_lock(struct folio *folio);
|
||||
|
|
|
|||
6
mm/gup.c
6
mm/gup.c
|
|
@ -2254,6 +2254,7 @@ EXPORT_SYMBOL(fault_in_readable);
|
|||
/**
|
||||
* get_dump_page() - pin user page in memory while writing it to core dump
|
||||
* @addr: user address
|
||||
* @locked: a pointer to an int denoting whether the mmap sem is held
|
||||
*
|
||||
* Returns struct page pointer of user page pinned for dump,
|
||||
* to be freed afterwards by put_page().
|
||||
|
|
@ -2266,13 +2267,12 @@ EXPORT_SYMBOL(fault_in_readable);
|
|||
* Called without mmap_lock (takes and releases the mmap_lock by itself).
|
||||
*/
|
||||
#ifdef CONFIG_ELF_CORE
|
||||
struct page *get_dump_page(unsigned long addr)
|
||||
struct page *get_dump_page(unsigned long addr, int *locked)
|
||||
{
|
||||
struct page *page;
|
||||
int locked = 0;
|
||||
int ret;
|
||||
|
||||
ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked,
|
||||
ret = __get_user_pages_locked(current->mm, addr, 1, &page, locked,
|
||||
FOLL_FORCE | FOLL_DUMP | FOLL_GET);
|
||||
return (ret == 1) ? page : NULL;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user