Newer
Older
* For now this makes the whole process use 4k pages.
*/
#ifdef CONFIG_PPC_64K_PAGES
void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
{
if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
return;
slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
copy_mm_to_paca(&mm->context);
slb_flush_and_rebolt();
}
#endif /* CONFIG_PPC_64K_PAGES */
#ifdef CONFIG_PPC_SUBPAGE_PROT
/*
* This looks up a 2-bit protection code for a 4k subpage of a 64k page.
* Userspace sets the subpage permissions using the subpage_prot system call.
*
* Result is 0: full permissions, _PAGE_RW: read-only,
* _PAGE_RWX: no access.
*/
static int subpage_protection(struct mm_struct *mm, unsigned long ea)
struct subpage_prot_table *spt = &mm->context.spt;
u32 spp = 0;
u32 **sbpm, *sbpp;
if (ea >= spt->maxaddr)
return 0;
/* addresses below 4GB use spt->low_prot */
sbpm = spt->low_prot;
} else {
sbpm = spt->protptrs[ea >> SBP_L3_SHIFT];
if (!sbpm)
return 0;
}
sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
if (!sbpp)
return 0;
spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)];
/* extract 2-bit bitfield for this 4k subpage */
spp >>= 30 - 2 * ((ea >> 12) & 0xf);
/*
* 0 -> full premission
* 1 -> Read only
* 2 -> no access.
* We return the flag that need to be cleared.
*/
spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0);
return spp;
}
#else /* CONFIG_PPC_SUBPAGE_PROT */
static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
{
return 0;
}
#endif
void hash_failure_debug(unsigned long ea, unsigned long access,
unsigned long vsid, unsigned long trap,
int ssize, int psize, int lpsize, unsigned long pte)
{
if (!printk_ratelimit())
return;
pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
ea, access, current->comm);
pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n",
trap, vsid, ssize, psize, lpsize, pte);
}
static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
int psize, bool user_region)
{
if (user_region) {
if (psize != get_paca_psize(ea)) {
copy_mm_to_paca(&mm->context);
slb_flush_and_rebolt();
}
} else if (get_paca()->vmalloc_sllp !=
mmu_psize_defs[mmu_vmalloc_psize].sllp) {
get_paca()->vmalloc_sllp =
mmu_psize_defs[mmu_vmalloc_psize].sllp;
slb_vmalloc_update();
}
}
/* Result code is:
* 0 - handled
* 1 - normal page fault
* -1 - critical hash insertion error
* -2 - access not permitted by subpage protection mechanism
int hash_page_mm(struct mm_struct *mm, unsigned long ea,
unsigned long access, unsigned long trap,
unsigned long flags)
bool is_thp;
enum ctx_state prev_state = exception_enter();
unsigned hugeshift;
const struct cpumask *tmp;
int rc, user_region = 0;
DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
ea, access, trap);
trace_hash_fault(ea, access, trap);
switch (REGION_ID(ea)) {
case USER_REGION_ID:
user_region = 1;
if (! mm) {
DBG_LOW(" user region with no mm !\n");
rc = 1;
goto bail;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
if (ea < VMALLOC_END)
psize = mmu_vmalloc_psize;
else
psize = mmu_io_psize;
break;
default:
/* Not a valid range
* Send the problem up to do_page_fault
*/
rc = 1;
goto bail;
DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
/* Bad address. */
if (!vsid) {
DBG_LOW("Bad address!\n");
rc = 1;
goto bail;
if (pgdir == NULL) {
rc = 1;
goto bail;
}
tmp = cpumask_of(smp_processor_id());
if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
flags |= HPTE_LOCAL_UPDATE;
#ifndef CONFIG_PPC_64K_PAGES
/* If we use 4K pages and our psize is not 4K, then we might
* be hitting a special driver mapping, and need to align the
* address before we fetch the PTE.
*
* It could also be a hugepage mapping, in which case this is
* not necessary, but it's not harmful, either.
*/
if (psize != MMU_PAGE_4K)
ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
#endif /* CONFIG_PPC_64K_PAGES */
/* Get PTE and page size from page tables */
ptep = __find_linux_pte_or_hugepte(pgdir, ea, &is_thp, &hugeshift);
if (ptep == NULL || !pte_present(*ptep)) {
DBG_LOW(" no PTE !\n");
rc = 1;
goto bail;
/* Add _PAGE_PRESENT to the required access perm */
access |= _PAGE_PRESENT;
/* Pre-check access permissions (will be re-checked atomically
* in __hash_page_XX but this pre-check is a fast path
*/
if (!check_pte_access(access, pte_val(*ptep))) {
DBG_LOW(" no access !\n");
rc = 1;
goto bail;
}
if (is_thp)
rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
trap, flags, ssize, psize);
#ifdef CONFIG_HUGETLB_PAGE
else
rc = __hash_page_huge(ea, access, vsid, ptep, trap,
flags, ssize, hugeshift, psize);
#else
else {
/*
* if we have hugeshift, and is not transhuge with
* hugetlb disabled, something is really wrong.
*/
rc = 1;
WARN_ON(1);
}
#endif
if (current->mm == mm)
check_paca_psize(ea, mm, psize, user_region);
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
#else
DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
pte_val(*(ptep + PTRS_PER_PTE)));
#endif
/* Do actual hashing */
#ifdef CONFIG_PPC_64K_PAGES
/* If H_PAGE_4K_PFN is set, make sure this is a 4k segment */
if ((pte_val(*ptep) & H_PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
demote_segment_4k(mm, ea);
psize = MMU_PAGE_4K;
}
/* If this PTE is non-cacheable and we have restrictions on
* using non cacheable large pages, then we switch to 4k
*/
if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
if (user_region) {
demote_segment_4k(mm, ea);
psize = MMU_PAGE_4K;
} else if (ea < VMALLOC_END) {
/*
* some driver did a non-cacheable mapping
* in vmalloc space, so switch vmalloc
* to 4k pages
*/
printk(KERN_ALERT "Reducing vmalloc segment "
"to 4kB pages because of "
"non-cacheable mapping\n");
psize = mmu_vmalloc_psize = MMU_PAGE_4K;
}
#endif /* CONFIG_PPC_64K_PAGES */
if (current->mm == mm)
check_paca_psize(ea, mm, psize, user_region);
#ifdef CONFIG_PPC_64K_PAGES
if (psize == MMU_PAGE_64K)
rc = __hash_page_64K(ea, access, vsid, ptep, trap,
flags, ssize);
#endif /* CONFIG_PPC_64K_PAGES */
int spp = subpage_protection(mm, ea);
if (access & spp)
rc = -2;
else
rc = __hash_page_4K(ea, access, vsid, ptep, trap,
flags, ssize, spp);
/* Dump some info in case of hash insertion failure, they should
* never happen so it is really useful to know if/when they do
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize, psize,
psize, pte_val(*ptep));
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
#else
DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
pte_val(*(ptep + PTRS_PER_PTE)));
#endif
DBG_LOW(" -> rc=%d\n", rc);
bail:
exception_exit(prev_state);
int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
unsigned long dsisr)
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
if (REGION_ID(ea) == VMALLOC_REGION_ID)
mm = &init_mm;
if (dsisr & DSISR_NOHPTE)
flags |= HPTE_NOHPTE_UPDATE;
return hash_page_mm(mm, ea, access, trap, flags);
int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
unsigned long dsisr)
{
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
if (REGION_ID(ea) == VMALLOC_REGION_ID)
mm = &init_mm;
if (dsisr & DSISR_NOHPTE)
flags |= HPTE_NOHPTE_UPDATE;
if (dsisr & DSISR_ISSTORE)
access |= _PAGE_WRITE;
/*
* We set _PAGE_PRIVILEGED only when
* kernel mode access kernel space.
*
* _PAGE_PRIVILEGED is NOT set
* 1) when kernel mode access user space
* 2) user space access kernel space.
*/
access |= _PAGE_PRIVILEGED;
if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
access &= ~_PAGE_PRIVILEGED;
if (trap == 0x400)
access |= _PAGE_EXEC;
return hash_page_mm(mm, ea, access, trap, flags);
}
#ifdef CONFIG_PPC_MM_SLICES
static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
{
int psize = get_slice_psize(mm, ea);
/* We only prefault standard pages for now */
if (unlikely(psize != mm->context.user_psize))
return false;
/*
* Don't prefault if subpage protection is enabled for the EA.
*/
if (unlikely((psize == MMU_PAGE_4K) && subpage_protection(mm, ea)))
return false;
return true;
}
#else
static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
{
return true;
}
#endif
void hash_preload(struct mm_struct *mm, unsigned long ea,
unsigned long access, unsigned long trap)
int hugepage_shift;
pte_t *ptep;
unsigned long flags;
int rc, ssize, update_flags = 0;
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
if (!should_hash_preload(mm, ea))
return;
DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
" trap=%lx\n", mm, mm->pgd, ea, access, trap);
/* Get Linux PTE if available */
pgdir = mm->pgd;
if (pgdir == NULL)
return;
/* Get VSID */
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
if (!vsid)
return;
/*
* Hash doesn't like irqs. Walking linux page table with irq disabled
* saves us from holding multiple locks.
*/
local_irq_save(flags);
/*
* THP pages use update_mmu_cache_pmd. We don't do
* hash preload there. Hence can ignore THP here
*/
ptep = find_linux_pte_or_hugepte(pgdir, ea, NULL, &hugepage_shift);
goto out_exit;
WARN_ON(hugepage_shift);
#ifdef CONFIG_PPC_64K_PAGES
/* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on
* a 64K kernel), then we don't preload, hash_page() will take
* care of it once we actually try to access the page.
* That way we don't have to duplicate all of the logic for segment
* page size demotion here
*/
if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep))
goto out_exit;
#endif /* CONFIG_PPC_64K_PAGES */
/* Is that local to this CPU ? */
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
update_flags |= HPTE_LOCAL_UPDATE;
/* Hash it in */
#ifdef CONFIG_PPC_64K_PAGES
if (mm->context.user_psize == MMU_PAGE_64K)
rc = __hash_page_64K(ea, access, vsid, ptep, trap,
update_flags, ssize);
#endif /* CONFIG_PPC_64K_PAGES */
rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags,
ssize, subpage_protection(mm, ea));
/* Dump some info in case of hash insertion failure, they should
* never happen so it is really useful to know if/when they do
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize,
mm->context.user_psize,
mm->context.user_psize,
pte_val(*ptep));
local_irq_restore(flags);
}
/* WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
unsigned long flags)
{
unsigned long hash, index, shift, hidx, slot;
int local = flags & HPTE_LOCAL_UPDATE;
DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
hash = hpt_hash(vpn, shift, ssize);
hidx = __rpte_to_hidx(pte, index);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
Aneesh Kumar K.V
committed
/*
* We use same base page size and actual psize, because we don't
* use these functions for hugepage
*/
ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* Transactions are not aborted by tlbiel, only tlbie.
* Without, syncing a page back to a block device w/ PIO could pick up
* transactional data (bad!) so we force an abort here. Before the
* sync the page will be made read-only, which will flush_hash_page.
* BIG ISSUE here: if the kernel uses a page from userspace without
* unmapping it first, it may see the speculated version.
*/
if (local && cpu_has_feature(CPU_FTR_TM) &&
current->thread.regs &&
MSR_TM_ACTIVE(current->thread.regs->msr)) {
tm_enable();
tm_abort(TM_CAUSE_TLBI);
}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
pmd_t *pmdp, unsigned int psize, int ssize,
unsigned long flags)
{
int i, max_hpte_count, valid;
unsigned long s_addr;
unsigned char *hpte_slot_array;
unsigned long hidx, shift, vpn, hash, slot;
int local = flags & HPTE_LOCAL_UPDATE;
s_addr = addr & HPAGE_PMD_MASK;
hpte_slot_array = get_hpte_slot_array(pmdp);
/*
* IF we try to do a HUGE PTE update after a withdraw is done.
* we will find the below NULL. This happens when we do
* split_huge_page_pmd
*/
if (!hpte_slot_array)
return;
if (ppc_md.hugepage_invalidate) {
ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
psize, ssize, local);
goto tm_abort;
}
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
/*
* No bluk hpte removal support, invalidate each entry
*/
shift = mmu_psize_defs[psize].shift;
max_hpte_count = HPAGE_PMD_SIZE >> shift;
for (i = 0; i < max_hpte_count; i++) {
/*
* 8 bits per each hpte entries
* 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
*/
valid = hpte_valid(hpte_slot_array, i);
if (!valid)
continue;
hidx = hpte_hash_index(hpte_slot_array, i);
/* get the vpn */
addr = s_addr + (i * (1ul << shift));
vpn = hpt_vpn(addr, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
ppc_md.hpte_invalidate(slot, vpn, psize,
MMU_PAGE_16M, ssize, local);
}
tm_abort:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/* Transactions are not aborted by tlbiel, only tlbie.
* Without, syncing a page back to a block device w/ PIO could pick up
* transactional data (bad!) so we force an abort here. Before the
* sync the page will be made read-only, which will flush_hash_page.
* BIG ISSUE here: if the kernel uses a page from userspace without
* unmapping it first, it may see the speculated version.
*/
if (local && cpu_has_feature(CPU_FTR_TM) &&
current->thread.regs &&
MSR_TM_ACTIVE(current->thread.regs->msr)) {
tm_enable();
tm_abort(TM_CAUSE_TLBI);
return;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
void flush_hash_range(unsigned long number, int local)
ppc_md.flush_hash_range(number, local);
struct ppc64_tlb_batch *batch =
this_cpu_ptr(&ppc64_tlb_batch);
flush_hash_page(batch->vpn[i], batch->pte[i],
}
}
/*
* low_hash_fault is called when we the low level hash code failed
* to instert a PTE due to an hypervisor error
*/
void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
enum ctx_state prev_state = exception_enter();
#ifdef CONFIG_PPC_SUBPAGE_PROT
if (rc == -2)
_exception(SIGSEGV, regs, SEGV_ACCERR, address);
else
#endif
_exception(SIGBUS, regs, BUS_ADRERR, address);
} else
bad_page_fault(regs, address, SIGBUS);
exception_exit(prev_state);
long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
unsigned long pa, unsigned long rflags,
unsigned long vflags, int psize, int ssize)
{
unsigned long hpte_group;
long slot;
repeat:
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
psize, psize, ssize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
vflags | HPTE_V_SECONDARY,
psize, psize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP)&~0x7UL;
ppc_md.hpte_remove(hpte_group);
goto repeat;
}
}
return slot;
}
#ifdef CONFIG_DEBUG_PAGEALLOC
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
unsigned long hash;
unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL));
long ret;
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
/* Don't create HPTE entries for bad address */
if (!vsid)
return;
ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
HPTE_V_BOLTED,
mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0);
spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80);
linear_map_hash_slots[lmi] = ret | 0x80;
spin_unlock(&linear_map_hash_lock);
}
static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
{
unsigned long hash, hidx, slot;
unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
spin_lock(&linear_map_hash_lock);
BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
hidx = linear_map_hash_slots[lmi] & 0x7f;
linear_map_hash_slots[lmi] = 0;
spin_unlock(&linear_map_hash_lock);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
Aneesh Kumar K.V
committed
ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize,
mmu_kernel_ssize, 0);
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
unsigned long flags, vaddr, lmi;
int i;
local_irq_save(flags);
for (i = 0; i < numpages; i++, page++) {
vaddr = (unsigned long)page_address(page);
lmi = __pa(vaddr) >> PAGE_SHIFT;
if (lmi >= linear_map_hash_count)
continue;
if (enable)
kernel_map_linear_page(vaddr, lmi);
else
kernel_unmap_linear_page(vaddr, lmi);
}
local_irq_restore(flags);
}
#endif /* CONFIG_DEBUG_PAGEALLOC */
Benjamin Herrenschmidt
committed
void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
Benjamin Herrenschmidt
committed
phys_addr_t first_memblock_size)
{
/* We don't currently support the first MEMBLOCK not mapping 0
* physical on those processors
*/
BUG_ON(first_memblock_base != 0);
/* On LPAR systems, the first entry is our RMA region,
* non-LPAR 64-bit hash MMU systems don't have a limitation
* on real mode access, but using the first entry works well
* enough. We also clamp it to 1G to avoid some funky things
* such as RTAS bugs etc...
*/
ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
}