Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/*
* linux/mm/percpu.c - percpu memory allocator
*
* Copyright (C) 2009 SUSE Linux Products GmbH
* Copyright (C) 2009 Tejun Heo <tj@kernel.org>
*
* This file is released under the GPLv2.
*
* This is percpu allocator which can handle both static and dynamic
* areas. Percpu areas are allocated in chunks in vmalloc area. Each
* chunk is consisted of num_possible_cpus() units and the first chunk
* is used for static percpu variables in the kernel image (special
* boot time alloc/init handling necessary as these areas need to be
* brought up before allocation services are running). Unit grows as
* necessary and all units grow or shrink in unison. When a chunk is
* filled up, another chunk is allocated. ie. in vmalloc area
*
* c0 c1 c2
* ------------------- ------------------- ------------
* | u0 | u1 | u2 | u3 | | u0 | u1 | u2 | u3 | | u0 | u1 | u
* ------------------- ...... ------------------- .... ------------
*
* Allocation is done in offset-size areas of single unit space. Ie,
* an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
* c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring
* percpu base registers UNIT_SIZE apart.
*
* There are usually many small percpu allocations many of them as
* small as 4 bytes. The allocator organizes chunks into lists
* according to free size and tries to allocate from the fullest one.
* Each chunk keeps the maximum contiguous area size hint which is
* guaranteed to be eqaul to or larger than the maximum contiguous
* area in the chunk. This helps the allocator not to iterate the
* chunk maps unnecessarily.
*
* Allocation state in each chunk is kept using an array of integers
* on chunk->map. A positive value in the map represents a free
* region and negative allocated. Allocation inside a chunk is done
* by scanning this map sequentially and serving the first matching
* entry. This is mostly copied from the percpu_modalloc() allocator.
* Chunks are also linked into a rb tree to ease address to chunk
* mapping during free.
*
* To use this allocator, arch code should do the followings.
*
* - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
*
* - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
* regular address to percpu pointer and back
*
* - use pcpu_setup_first_chunk() during percpu area initialization to
* setup the first chunk containing the kernel static percpu area
*/
#include <linux/bitmap.h>
#include <linux/bootmem.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/percpu.h>
#include <linux/pfn.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
struct pcpu_chunk {
struct list_head list; /* linked to pcpu_slot lists */
struct rb_node rb_node; /* key is chunk->vm->addr */
int free_size; /* free bytes in the chunk */
int contig_hint; /* max contiguous size hint */
struct vm_struct *vm; /* mapped vmalloc region */
int map_used; /* # of map entries used */
int map_alloc; /* # of map entries allocated */
int *map; /* allocation map */
bool immutable; /* no [de]population allowed */
struct page **page; /* points to page array */
struct page *page_ar[]; /* #cpus * UNIT_PAGES */
static int pcpu_unit_pages __read_mostly;
static int pcpu_unit_size __read_mostly;
static int pcpu_chunk_size __read_mostly;
static int pcpu_nr_slots __read_mostly;
static size_t pcpu_chunk_struct_size __read_mostly;
/* the address of the first chunk which starts with the kernel static area */
void *pcpu_base_addr __read_mostly;
EXPORT_SYMBOL_GPL(pcpu_base_addr);
Tejun Heo
committed
/* optional reserved chunk, only accessible for reserved allocations */
static struct pcpu_chunk *pcpu_reserved_chunk;
/* offset limit of the reserved chunk */
static int pcpu_reserved_chunk_limit;
/*
* One mutex to rule them all.
*
* The following mutex is grabbed in the outermost public alloc/free
* interface functions and released only when the operation is
* complete. As such, every function in this file other than the
* outermost functions are called under pcpu_mutex.
*
* It can easily be switched to use spinlock such that only the area
* allocation and page population commit are protected with it doing
* actual [de]allocation without holding any lock. However, given
* what this allocator does, I think it's better to let them run
* sequentially.
*/
static DEFINE_MUTEX(pcpu_mutex);
static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */
static int __pcpu_size_to_slot(int size)
return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
}
static int pcpu_size_to_slot(int size)
{
if (size == pcpu_unit_size)
return pcpu_nr_slots - 1;
return __pcpu_size_to_slot(size);
}
static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
{
if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int))
return 0;
return pcpu_size_to_slot(chunk->free_size);
}
static int pcpu_page_idx(unsigned int cpu, int page_idx)
{
return cpu * pcpu_unit_pages + page_idx;
}
static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk,
unsigned int cpu, int page_idx)
{
return &chunk->page[pcpu_page_idx(cpu, page_idx)];
}
static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
unsigned int cpu, int page_idx)
{
return (unsigned long)chunk->vm->addr +
(pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT);
}
static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
int page_idx)
{
return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
}
/**
* pcpu_realloc - versatile realloc
* @p: the current pointer (can be NULL for new allocations)
* @size: the current size in bytes (can be 0 for new allocations)
* @new_size: the wanted new size in bytes (can be 0 for free)
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
*
* More robust realloc which can be used to allocate, resize or free a
* memory area of arbitrary size. If the needed size goes over
* PAGE_SIZE, kernel VM is used.
*
* RETURNS:
* The new pointer on success, NULL on failure.
*/
static void *pcpu_realloc(void *p, size_t size, size_t new_size)
{
void *new;
if (new_size <= PAGE_SIZE)
new = kmalloc(new_size, GFP_KERNEL);
else
new = vmalloc(new_size);
if (new_size && !new)
return NULL;
memcpy(new, p, min(size, new_size));
if (new_size > size)
memset(new + size, 0, new_size - size);
if (size <= PAGE_SIZE)
kfree(p);
else
vfree(p);
return new;
}
Loading
Loading full blame...