Newer
Older
/* allocate pointer array and alloc large pages */
map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]);
pcpul_map = alloc_bootmem(map_size);
/* allocate all pages */
for (i = 0; i < pcpul_nr_lpages; i++) {
size_t offset = i * lpage_size;
int first_unit = offset / ai->unit_size;
int last_unit = (offset + lpage_size - 1) / ai->unit_size;
/* find out which cpu is mapped to this unit */
for (unit = first_unit; unit <= last_unit; unit++)
if (pcpul_unit_to_cpu(unit, ai, &cpu))
goto found;
continue;
found:
ptr = alloc_fn(cpu, lpage_size, lpage_size);
if (!ptr) {
pr_warning("PERCPU: failed to allocate large page "
"for cpu%u\n", cpu);
goto enomem;
}
pcpul_map[i].ptr = ptr;
}
/* return unused holes */
for (unit = 0; unit < nr_units; unit++) {
size_t start = unit * ai->unit_size;
size_t end = start + ai->unit_size;
size_t off, next;
/* don't free used part of occupied unit */
if (pcpul_unit_to_cpu(unit, ai, NULL))
start += pcpul_size;
/* unit can span more than one page, punch the holes */
for (off = start; off < end; off = next) {
void *ptr = pcpul_map[off / lpage_size].ptr;
next = min(roundup(off + 1, lpage_size), end);
if (ptr)
free_fn(ptr + off % lpage_size, next - off);
}
/* allocate address, map and copy */
vm.flags = VM_ALLOC;
vm.size = chunk_size;
vm_area_register_early(&vm, ai->unit_size);
for (i = 0; i < pcpul_nr_lpages; i++) {
if (!pcpul_map[i].ptr)
continue;
pcpul_map[i].map_addr = vm.addr + i * lpage_size;
map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr);
}
for_each_possible_cpu(cpu)
memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size,
__per_cpu_load, ai->static_size);
/* we're ready, commit */
pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n",
vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
ai->unit_size);
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
/*
* Sort pcpul_map array for pcpu_lpage_remapped(). Unmapped
* lpages are pushed to the end and trimmed.
*/
for (i = 0; i < pcpul_nr_lpages - 1; i++)
for (j = i + 1; j < pcpul_nr_lpages; j++) {
struct pcpul_ent tmp;
if (!pcpul_map[j].ptr)
continue;
if (pcpul_map[i].ptr &&
pcpul_map[i].ptr < pcpul_map[j].ptr)
continue;
tmp = pcpul_map[i];
pcpul_map[i] = pcpul_map[j];
pcpul_map[j] = tmp;
}
while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
pcpul_nr_lpages--;
for (i = 0; i < pcpul_nr_lpages; i++)
if (pcpul_map[i].ptr)
free_fn(pcpul_map[i].ptr, lpage_size);
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
free_bootmem(__pa(pcpul_map), map_size);
return -ENOMEM;
}
/**
* pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
* @kaddr: the kernel address in question
*
* Determine whether @kaddr falls in the pcpul recycled area. This is
* used by pageattr to detect VM aliases and break up the pcpu large
* page mapping such that the same physical page is not mapped under
* different attributes.
*
* The recycled area is always at the tail of a partially used large
* page.
*
* RETURNS:
* Address of corresponding remapped pcpu address if match is found;
* otherwise, NULL.
*/
void *pcpu_lpage_remapped(void *kaddr)
{
unsigned long lpage_mask = pcpul_lpage_size - 1;
void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask);
unsigned long offset = (unsigned long)kaddr & lpage_mask;
int left = 0, right = pcpul_nr_lpages - 1;
int pos;
/* pcpul in use at all? */
if (!pcpul_map)
return NULL;
/* okay, perform binary search */
while (left <= right) {
pos = (left + right) / 2;
if (pcpul_map[pos].ptr < lpage_addr)
left = pos + 1;
else if (pcpul_map[pos].ptr > lpage_addr)
right = pos - 1;
else
return pcpul_map[pos].map_addr + offset;
}
return NULL;
}
#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */
/*
* Generic percpu area setup.
*
* The embedding helper is used because its behavior closely resembles
* the original non-dynamic generic percpu area setup. This is
* important because many archs have addressing restrictions and might
* fail if the percpu area is located far away from the previous
* location. As an added bonus, in non-NUMA cases, embedding is
* generally a good idea TLB-wise because percpu area can piggy back
* on the physical linear memory mapping which uses large page
* mappings on applicable archs.
*/
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
unsigned int cpu;
/*
* Always reserve area for module percpu variables. That's
* what the legacy allocator did.
*/
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
PERCPU_DYNAMIC_RESERVE);
if (rc < 0)
panic("Failed to initialized percpu areas.");
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu)
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
}
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */