Newer
Older
// SPDX-License-Identifier: GPL-2.0
/*
* KFENCE guarded object allocator and fault handling.
*
* Copyright (C) 2020, Google LLC.
*/
#define pr_fmt(fmt) "kfence: " fmt
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/debugfs.h>
#include <linux/hash.h>
#include <linux/jhash.h>
#include <linux/kcsan-checks.h>
#include <linux/kfence.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/log2.h>
#include <linux/memblock.h>
#include <linux/moduleparam.h>
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/sched/clock.h>
#include <linux/sched/sysctl.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <asm/kfence.h>
#include "kfence.h"
/* Disables KFENCE on the first warning assuming an irrecoverable error. */
#define KFENCE_WARN_ON(cond) \
({ \
const bool __cond = WARN_ON(cond); \
if (unlikely(__cond)) { \
WRITE_ONCE(kfence_enabled, false); \
disabled_by_warn = true; \
} \
__cond; \
})
/* === Data ================================================================= */
static bool kfence_enabled __read_mostly;
static bool disabled_by_warn __read_mostly;
unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */
#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#define MODULE_PARAM_PREFIX "kfence."
static int kfence_enable_late(void);
static int param_set_sample_interval(const char *val, const struct kernel_param *kp)
{
unsigned long num;
int ret = kstrtoul(val, 0, &num);
if (ret < 0)
return ret;
if (!num) /* Using 0 to indicate KFENCE is disabled. */
WRITE_ONCE(kfence_enabled, false);
*((unsigned long *)kp->arg) = num;
if (num && !READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
return disabled_by_warn ? -EINVAL : kfence_enable_late();
return 0;
}
static int param_get_sample_interval(char *buffer, const struct kernel_param *kp)
{
if (!READ_ONCE(kfence_enabled))
return sprintf(buffer, "0\n");
return param_get_ulong(buffer, kp);
}
static const struct kernel_param_ops sample_interval_param_ops = {
.set = param_set_sample_interval,
.get = param_get_sample_interval,
};
module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
/* Pool usage% threshold when currently covered allocations are skipped. */
static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
/* If true, use a deferrable timer. */
static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE);
module_param_named(deferrable, kfence_deferrable, bool, 0444);
/* The pool of pages used for guard pages and objects. */
char *__kfence_pool __read_mostly;
EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
/*
* Per-object metadata, with one-to-one mapping of object metadata to
* backing pages (in __kfence_pool).
*/
static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
/* Freelist with available objects. */
static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
/*
* The static key to set up a KFENCE allocation; or if static keys are not used
* to gate allocations, to avoid a load and compare if KFENCE is disabled.
*/
DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
/* Gates the allocation, ensuring only one succeeds in a given period. */
atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/*
* A Counting Bloom filter of allocation coverage: limits currently covered
* allocations of the same source filling up the pool.
*
* Assuming a range of 15%-85% unique allocations in the pool at any point in
* time, the below parameters provide a probablity of 0.02-0.33 for false
* positive hits respectively:
*
* P(alloc_traces) = (1 - e^(-HNUM * (alloc_traces / SIZE)) ^ HNUM
*/
#define ALLOC_COVERED_HNUM 2
#define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)
#define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER)
#define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER)
#define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1)
static atomic_t alloc_covered[ALLOC_COVERED_SIZE];
/* Stack depth used to determine uniqueness of an allocation. */
#define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8)
/*
* Randomness for stack hashes, making the same collisions across reboots and
* different machines less likely.
*/
static u32 stack_hash_seed __ro_after_init;
/* Statistics counters for debugfs. */
enum kfence_counter_id {
KFENCE_COUNTER_ALLOCATED,
KFENCE_COUNTER_ALLOCS,
KFENCE_COUNTER_FREES,
KFENCE_COUNTER_ZOMBIES,
KFENCE_COUNTER_BUGS,
KFENCE_COUNTER_SKIP_INCOMPAT,
KFENCE_COUNTER_SKIP_CAPACITY,
KFENCE_COUNTER_SKIP_COVERED,
KFENCE_COUNTER_COUNT,
};
static atomic_long_t counters[KFENCE_COUNTER_COUNT];
static const char *const counter_names[] = {
[KFENCE_COUNTER_ALLOCATED] = "currently allocated",
[KFENCE_COUNTER_ALLOCS] = "total allocations",
[KFENCE_COUNTER_FREES] = "total frees",
[KFENCE_COUNTER_ZOMBIES] = "zombie allocations",
[KFENCE_COUNTER_BUGS] = "total bugs",
[KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)",
[KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)",
[KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)",
};
static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
/* === Internals ============================================================ */
static inline bool should_skip_covered(void)
{
unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100;
return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh;
}
static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries)
{
num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH);
num_entries = filter_irq_stacks(stack_entries, num_entries);
return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed);
}
/*
* Adds (or subtracts) count @val for allocation stack trace hash
* @alloc_stack_hash from Counting Bloom filter.
*/
static void alloc_covered_add(u32 alloc_stack_hash, int val)
{
int i;
Loading
Loading full blame...