Commit a2a84e36 authored by Rui Wang's avatar Rui Wang Committed by Huacai Chen
Browse files

LoongArch: mm: Refactor TLB exception handlers



This patch simplifies TLB load, store and modify exception handlers:

1. Reduce instructions, such as alu/csr and memory access;
2. Execute tlb search instruction only in the fast path;
3. Return directly from the fast path for both normal and huge pages;
4. Re-tab the assembly for better vertical alignment.

And fixes the concurrent modification issue of fast path for huge pages.

This issue will occur in the following steps:

   CPU-1 (In TLB exception)         CPU-2 (In THP splitting)
1: Load PMD entry (HUGE=1)
2: Goto huge path
3:                                  Store PMD entry (HUGE=0)
4: Reload PMD entry (HUGE=0)
5: Fill TLB entry (PA is incorrect)

This patch also slightly improves the TLB processing performance:

* Normal pages: 2.15%, Huge pages: 1.70%.

  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
  #include <sys/mman.h>

  int main(int argc, char *argv[])
  {
        size_t page_size;
        size_t mem_size;
        size_t off;
        void *base;
        int flags;
        int i;

        if (argc < 2) {
                fprintf(stderr, "%s MEM_SIZE [HUGE]\n", argv[0]);
                return -1;
        }

        page_size = sysconf(_SC_PAGESIZE);
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
        mem_size = strtoul(argv[1], NULL, 10);
        if (argc > 2)
                flags |= MAP_HUGETLB;

        for (i = 0; i < 10; i++) {
                base = mmap(NULL, mem_size, PROT_READ, flags, -1, 0);
                if (base == MAP_FAILED) {
                        fprintf(stderr, "Map memory failed!\n");
                        return -1;
                }

                for (off = 0; off < mem_size; off += page_size)
                        *(volatile int *)(base + off);

                munmap(base, mem_size);
        }

        return 0;
  }

Signed-off-by: default avatarRui Wang <wangrui@loongson.cn>
Signed-off-by: default avatarHuacai Chen <chenhuacai@loongson.cn>
parent 59b3d4a9
Loading
Loading
Loading
Loading
+247 −290
Original line number Diff line number Diff line
@@ -10,6 +10,11 @@
#include <asm/regdef.h>
#include <asm/stackframe.h>

#define PTRS_PER_PGD_BITS	(PAGE_SHIFT - 3)
#define PTRS_PER_PUD_BITS	(PAGE_SHIFT - 3)
#define PTRS_PER_PMD_BITS	(PAGE_SHIFT - 3)
#define PTRS_PER_PTE_BITS	(PAGE_SHIFT - 3)

	.macro tlb_do_page_fault, write
	SYM_FUNC_START(tlb_do_page_fault_\write)
	SAVE_ALL
@@ -52,25 +57,17 @@ SYM_FUNC_START(handle_tlb_load)

vmalloc_done_load:
	/* Get PGD offset in bytes */
	srli.d	t0, t0, PGDIR_SHIFT
	andi	t0, t0, (PTRS_PER_PGD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0
	bstrpick.d	ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
	alsl.d		t1, ra, t1, 3
#if CONFIG_PGTABLE_LEVELS > 3
	csrrd	t0, LOONGARCH_CSR_BADV
	ld.d		t1, t1, 0
	srli.d	t0, t0, PUD_SHIFT
	andi	t0, t0, (PTRS_PER_PUD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0
	bstrpick.d	ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
	alsl.d		t1, ra, t1, 3
#endif
#if CONFIG_PGTABLE_LEVELS > 2
	csrrd	t0, LOONGARCH_CSR_BADV
	ld.d		t1, t1, 0
	srli.d	t0, t0, PMD_SHIFT
	andi	t0, t0, (PTRS_PER_PMD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0
	bstrpick.d	ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
	alsl.d		t1, ra, t1, 3
#endif
	ld.d		ra, t1, 0

@@ -79,27 +76,20 @@ vmalloc_done_load:
	 * instead contains the tlb pte. Check the PAGE_HUGE bit and
	 * see if we need to jump to huge tlb processing.
	 */
	andi	t0, ra, _PAGE_HUGE
	bnez	t0, tlb_huge_update_load
	rotri.d		ra, ra, _PAGE_HUGE_SHIFT + 1
	bltz		ra, tlb_huge_update_load

	csrrd	t0, LOONGARCH_CSR_BADV
	srli.d	t0, t0, PAGE_SHIFT
	andi	t0, t0, (PTRS_PER_PTE - 1)
	slli.d	t0, t0, _PTE_T_LOG2
	add.d	t1, ra, t0
	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
	bstrpick.d	t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
	alsl.d		t1, t0, ra, _PTE_T_LOG2

#ifdef CONFIG_SMP
smp_pgtable_change_load:
#endif
#ifdef CONFIG_SMP
	ll.d		t0, t1, 0
#else
	ld.d		t0, t1, 0
#endif
	tlbsrch

	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
	andi	ra, ra, 1
	andi		ra, t0, _PAGE_PRESENT
	beqz		ra, nopage_tlb_load

	ori		t0, t0, _PAGE_VALID
@@ -109,47 +99,44 @@ smp_pgtable_change_load:
#else
	st.d		t0, t1, 0
#endif
	ori	t1, t1, 8
	xori	t1, t1, 8
	tlbsrch
	bstrins.d	t1, zero, 3, 3
	ld.d		t0, t1, 0
	ld.d		t1, t1, 8
	csrwr		t0, LOONGARCH_CSR_TLBELO0
	csrwr		t1, LOONGARCH_CSR_TLBELO1
	tlbwr
leave_load:

	csrrd		t0, EXCEPTION_KS0
	csrrd		t1, EXCEPTION_KS1
	csrrd		ra, EXCEPTION_KS2
	ertn

#ifdef CONFIG_64BIT
vmalloc_load:
	la.abs		t1, swapper_pg_dir
	b		vmalloc_done_load
#endif

	/*
	 * This is the entry point when build_tlbchange_handler_head
	 * spots a huge page.
	 */
	/* This is the entry point of a huge page. */
tlb_huge_update_load:
#ifdef CONFIG_SMP
	ll.d	t0, t1, 0
#else
	ld.d	t0, t1, 0
	ll.d		ra, t1, 0
#endif
	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
	andi	ra, ra, 1
	beqz	ra, nopage_tlb_load
	tlbsrch
	andi		t0, ra, _PAGE_PRESENT
	beqz		t0, nopage_tlb_load

	ori	t0, t0, _PAGE_VALID
#ifdef CONFIG_SMP
	ori		t0, ra, _PAGE_VALID
	sc.d		t0, t1, 0
	beqz		t0, tlb_huge_update_load
	ld.d	t0, t1, 0
	ori		t0, ra, _PAGE_VALID
#else
	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
	ori		t0, ra, _PAGE_VALID
	st.d		t0, t1, 0
#endif
	tlbsrch
	addu16i.d	t1, zero, -(CSR_TLBIDX_EHINV >> 16)
	addi.d		ra, t1, 0
	csrxchg		ra, t1, LOONGARCH_CSR_TLBIDX
@@ -173,9 +160,8 @@ tlb_huge_update_load:
	srli.d		t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
	or		t0, t0, t1

	addi.d	ra, t0, 0
	csrwr	t0, LOONGARCH_CSR_TLBELO0
	addi.d	t0, ra, 0
	move		ra, t0
	csrwr		ra, LOONGARCH_CSR_TLBELO0

	/* Convert to entrylo1 */
	addi.d		t1, zero, 1
@@ -194,6 +180,11 @@ tlb_huge_update_load:
	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

	csrrd		t0, EXCEPTION_KS0
	csrrd		t1, EXCEPTION_KS1
	csrrd		ra, EXCEPTION_KS2
	ertn

nopage_tlb_load:
	dbar		0
	csrrd		ra, EXCEPTION_KS2
@@ -215,26 +206,17 @@ SYM_FUNC_START(handle_tlb_store)

vmalloc_done_store:
	/* Get PGD offset in bytes */
	srli.d	t0, t0, PGDIR_SHIFT
	andi	t0, t0, (PTRS_PER_PGD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0

	bstrpick.d	ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
	alsl.d		t1, ra, t1, 3
#if CONFIG_PGTABLE_LEVELS > 3
	csrrd	t0, LOONGARCH_CSR_BADV
	ld.d		t1, t1, 0
	srli.d	t0, t0, PUD_SHIFT
	andi	t0, t0, (PTRS_PER_PUD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0
	bstrpick.d	ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
	alsl.d		t1, ra, t1, 3
#endif
#if CONFIG_PGTABLE_LEVELS > 2
	csrrd	t0, LOONGARCH_CSR_BADV
	ld.d		t1, t1, 0
	srli.d	t0, t0, PMD_SHIFT
	andi	t0, t0, (PTRS_PER_PMD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0
	bstrpick.d	ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
	alsl.d		t1, ra, t1, 3
#endif
	ld.d		ra, t1, 0

@@ -243,28 +225,21 @@ vmalloc_done_store:
	 * instead contains the tlb pte. Check the PAGE_HUGE bit and
	 * see if we need to jump to huge tlb processing.
	 */
	andi	t0, ra, _PAGE_HUGE
	bnez	t0, tlb_huge_update_store
	rotri.d		ra, ra, _PAGE_HUGE_SHIFT + 1
	bltz		ra, tlb_huge_update_store

	csrrd	t0, LOONGARCH_CSR_BADV
	srli.d	t0, t0, PAGE_SHIFT
	andi	t0, t0, (PTRS_PER_PTE - 1)
	slli.d	t0, t0, _PTE_T_LOG2
	add.d	t1, ra, t0
	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
	bstrpick.d	t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
	alsl.d		t1, t0, ra, _PTE_T_LOG2

#ifdef CONFIG_SMP
smp_pgtable_change_store:
#endif
#ifdef CONFIG_SMP
	ll.d		t0, t1, 0
#else
	ld.d		t0, t1, 0
#endif
	tlbsrch

	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
	andi	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
	xori	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
	andi		ra, t0, _PAGE_PRESENT | _PAGE_WRITE
	xori		ra, ra, _PAGE_PRESENT | _PAGE_WRITE
	bnez		ra, nopage_tlb_store

	ori		t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
@@ -274,50 +249,45 @@ smp_pgtable_change_store:
#else
	st.d		t0, t1, 0
#endif

	ori	t1, t1, 8
	xori	t1, t1, 8
	tlbsrch
	bstrins.d	t1, zero, 3, 3
	ld.d		t0, t1, 0
	ld.d		t1, t1, 8
	csrwr		t0, LOONGARCH_CSR_TLBELO0
	csrwr		t1, LOONGARCH_CSR_TLBELO1
	tlbwr
leave_store:

	csrrd		t0, EXCEPTION_KS0
	csrrd		t1, EXCEPTION_KS1
	csrrd		ra, EXCEPTION_KS2
	ertn

#ifdef CONFIG_64BIT
vmalloc_store:
	la.abs		t1, swapper_pg_dir
	b		vmalloc_done_store
#endif

	/*
	 * This is the entry point when build_tlbchange_handler_head
	 * spots a huge page.
	 */
	/* This is the entry point of a huge page. */
tlb_huge_update_store:
#ifdef CONFIG_SMP
	ll.d	t0, t1, 0
#else
	ld.d	t0, t1, 0
	ll.d		ra, t1, 0
#endif
	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
	andi	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
	xori	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
	bnez	ra, nopage_tlb_store

	tlbsrch
	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
	andi		t0, ra, _PAGE_PRESENT | _PAGE_WRITE
	xori		t0, t0, _PAGE_PRESENT | _PAGE_WRITE
	bnez		t0, nopage_tlb_store

#ifdef CONFIG_SMP
	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
	sc.d		t0, t1, 0
	beqz		t0, tlb_huge_update_store
	ld.d	t0, t1, 0
	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#else
	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
	st.d		t0, t1, 0
#endif
	tlbsrch
	addu16i.d	t1, zero, -(CSR_TLBIDX_EHINV >> 16)
	addi.d		ra, t1, 0
	csrxchg		ra, t1, LOONGARCH_CSR_TLBIDX
@@ -340,9 +310,8 @@ tlb_huge_update_store:
	srli.d		t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
	or		t0, t0, t1

	addi.d	ra, t0, 0
	csrwr	t0, LOONGARCH_CSR_TLBELO0
	addi.d	t0, ra, 0
	move		ra, t0
	csrwr		ra, LOONGARCH_CSR_TLBELO0

	/* Convert to entrylo1 */
	addi.d		t1, zero, 1
@@ -362,6 +331,11 @@ tlb_huge_update_store:
	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

	csrrd		t0, EXCEPTION_KS0
	csrrd		t1, EXCEPTION_KS1
	csrrd		ra, EXCEPTION_KS2
	ertn

nopage_tlb_store:
	dbar		0
	csrrd		ra, EXCEPTION_KS2
@@ -383,25 +357,17 @@ SYM_FUNC_START(handle_tlb_modify)

vmalloc_done_modify:
	/* Get PGD offset in bytes */
	srli.d	t0, t0, PGDIR_SHIFT
	andi	t0, t0, (PTRS_PER_PGD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0
	bstrpick.d	ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
	alsl.d		t1, ra, t1, 3
#if CONFIG_PGTABLE_LEVELS > 3
	csrrd	t0, LOONGARCH_CSR_BADV
	ld.d		t1, t1, 0
	srli.d	t0, t0, PUD_SHIFT
	andi	t0, t0, (PTRS_PER_PUD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0
	bstrpick.d	ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
	alsl.d		t1, ra, t1, 3
#endif
#if CONFIG_PGTABLE_LEVELS > 2
	csrrd	t0, LOONGARCH_CSR_BADV
	ld.d		t1, t1, 0
	srli.d	t0, t0, PMD_SHIFT
	andi	t0, t0, (PTRS_PER_PMD - 1)
	slli.d	t0, t0, 3
	add.d	t1, t1, t0
	bstrpick.d	ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
	alsl.d		t1, ra, t1, 3
#endif
	ld.d		ra, t1, 0

@@ -410,27 +376,20 @@ vmalloc_done_modify:
	 * instead contains the tlb pte. Check the PAGE_HUGE bit and
	 * see if we need to jump to huge tlb processing.
	 */
	andi	t0, ra, _PAGE_HUGE
	bnez	t0, tlb_huge_update_modify
	rotri.d		ra, ra, _PAGE_HUGE_SHIFT + 1
	bltz		ra, tlb_huge_update_modify

	csrrd	t0, LOONGARCH_CSR_BADV
	srli.d	t0, t0, PAGE_SHIFT
	andi	t0, t0, (PTRS_PER_PTE - 1)
	slli.d	t0, t0, _PTE_T_LOG2
	add.d	t1, ra, t0
	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
	bstrpick.d	t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
	alsl.d		t1, t0, ra, _PTE_T_LOG2

#ifdef CONFIG_SMP
smp_pgtable_change_modify:
#endif
#ifdef CONFIG_SMP
	ll.d		t0, t1, 0
#else
	ld.d		t0, t1, 0
#endif
	tlbsrch

	srli.d	ra, t0, _PAGE_WRITE_SHIFT
	andi	ra, ra, 1
	andi		ra, t0, _PAGE_WRITE
	beqz		ra, nopage_tlb_modify

	ori		t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
@@ -440,47 +399,41 @@ smp_pgtable_change_modify:
#else
	st.d		t0, t1, 0
#endif
	ori	t1, t1, 8
	xori	t1, t1, 8
	tlbsrch
	bstrins.d	t1, zero, 3, 3
	ld.d		t0, t1, 0
	ld.d		t1, t1, 8
	csrwr		t0, LOONGARCH_CSR_TLBELO0
	csrwr		t1, LOONGARCH_CSR_TLBELO1
	tlbwr
leave_modify:

	csrrd		t0, EXCEPTION_KS0
	csrrd		t1, EXCEPTION_KS1
	csrrd		ra, EXCEPTION_KS2
	ertn

#ifdef CONFIG_64BIT
vmalloc_modify:
	la.abs		t1, swapper_pg_dir
	b		vmalloc_done_modify
#endif

	/*
	 * This is the entry point when
	 * build_tlbchange_handler_head spots a huge page.
	 */
	/* This is the entry point of a huge page. */
tlb_huge_update_modify:
#ifdef CONFIG_SMP
	ll.d	t0, t1, 0
#else
	ld.d	t0, t1, 0
	ll.d		ra, t1, 0
#endif

	srli.d	ra, t0, _PAGE_WRITE_SHIFT
	andi	ra, ra, 1
	beqz	ra, nopage_tlb_modify

	tlbsrch
	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
	andi		t0, ra, _PAGE_WRITE
	beqz		t0, nopage_tlb_modify

#ifdef CONFIG_SMP
	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
	sc.d		t0, t1, 0
	beqz		t0, tlb_huge_update_modify
	ld.d	t0, t1, 0
	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#else
	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
	st.d		t0, t1, 0
#endif
	/*
@@ -499,9 +452,8 @@ tlb_huge_update_modify:
	srli.d		t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
	or		t0, t0, t1

	addi.d	ra, t0, 0
	csrwr	t0, LOONGARCH_CSR_TLBELO0
	addi.d	t0, ra, 0
	move		ra, t0
	csrwr		ra, LOONGARCH_CSR_TLBELO0

	/* Convert to entrylo1 */
	addi.d		t1, zero, 1
@@ -521,6 +473,11 @@ tlb_huge_update_modify:
	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX

	csrrd		t0, EXCEPTION_KS0
	csrrd		t1, EXCEPTION_KS1
	csrrd		ra, EXCEPTION_KS2
	ertn

nopage_tlb_modify:
	dbar		0
	csrrd		ra, EXCEPTION_KS2