Commit d7a799ec authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bpf: add netfilter program type'



Florian Westphal says:

====================
Changes since last version:
- rework test case in last patch wrt. ctx->skb dereference etc (Alexei)
- pacify bpf ci tests, netfilter program type missed string translation
  in libbpf helper.

This still uses runtime btf walk rather than extending
the btf trace array as Alexei suggested, I would do this later (or someone else can).

v1 cover letter:

Add minimal support to hook bpf programs to netfilter hooks, e.g.
PREROUTING or FORWARD.

For this the most relevant parts for registering a netfilter
hook via the in-kernel api are exposed to userspace via bpf_link.

The new program type is 'tracing style', i.e. there is no context
access rewrite done by verifier, the function argument (struct bpf_nf_ctx)
isn't stable.
There is no support for direct packet access, dynptr api should be used
instead.

With this its possible to build a small test program such as:

 #include "vmlinux.h"
extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
                               struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
                                   void *buffer, uint32_t buffer__sz) __ksym;
SEC("netfilter")
int nf_test(struct bpf_nf_ctx *ctx)
{
	struct nf_hook_state *state = ctx->state;
	struct sk_buff *skb = ctx->skb;
	const struct iphdr *iph, _iph;
	const struct tcphdr *th, _th;
	struct bpf_dynptr ptr;

	if (bpf_dynptr_from_skb(skb, 0, &ptr))
		return NF_DROP;

	iph = bpf_dynptr_slice(&ptr, 0, &_iph, sizeof(_iph));
	if (!iph)
		return NF_DROP;

	th = bpf_dynptr_slice(&ptr, iph->ihl << 2, &_th, sizeof(_th));
	if (!th)
		return NF_DROP;

	bpf_printk("accept %x:%d->%x:%d, hook %d ifin %d\n",
		   iph->saddr, bpf_ntohs(th->source), iph->daddr,
		   bpf_ntohs(th->dest), state->hook, state->in->ifindex);
        return NF_ACCEPT;
}

Then, tail /sys/kernel/tracing/trace_pipe.

Changes since v3:
- uapi: remove 'reserved' struct member, s/prio/priority (Alexei)
- add ctx access test cases (Alexei, see last patch)
- some arm32 can only handle cmpxchg on u32 (build bot)
- Fix kdoc annotations (Simon Horman)
- bpftool: prefer p_err, not fprintf (Quentin)
- add test cases in separate patch

Changes since v2:
1. don't WARN when user calls 'bpftool loink detach' twice
   restrict attachment to ip+ip6 families, lets relax this
   later in case arp/bridge/netdev are needed too.
2. show netfilter links in 'bpftool net' output as well.

Changes since v1:
1. Don't fail to link when CONFIG_NETFILTER=n (build bot)
2. Use test_progs instead of test_verifier (Alexei)

Changes since last RFC version:
1. extend 'bpftool link show' to print prio/hooknum etc
2. extend 'nft list hooks' so it can print the bpf program id
3. Add an extra patch to artificially restrict bpf progs with
   same priority.  Its fine from a technical pov but it will
   cause ordering issues (most recent one comes first).
   Can be removed later.
4. Add test_run support for netfilter prog type and a small
   extension to verifier tests to make sure we can't return
   verdicts like NF_STOLEN.
5. Alter the netfilter part of the bpf_link uapi struct:
   - add flags/reserved members.
  Not used here except returning errors when they are nonzero.
  Plan is to allow the bpf_link users to enable netfilter
  defrag or conntrack engine by setting feature flags at
  link create time in the future.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 45cea721 006c0e44
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -2264,6 +2264,9 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
				const union bpf_attr *kattr,
				union bpf_attr __user *uattr);
int bpf_prog_test_run_nf(struct bpf_prog *prog,
			 const union bpf_attr *kattr,
			 union bpf_attr __user *uattr);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
		    const struct bpf_prog *prog,
		    struct bpf_insn_access_aux *info);
+4 −0
Original line number Diff line number Diff line
@@ -79,6 +79,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
#endif
BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall,
	      void *, void *)
#ifdef CONFIG_NETFILTER
BPF_PROG_TYPE(BPF_PROG_TYPE_NETFILTER, netfilter,
	      struct bpf_nf_ctx, struct bpf_nf_ctx)
#endif

BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
+1 −0
Original line number Diff line number Diff line
@@ -80,6 +80,7 @@ typedef unsigned int nf_hookfn(void *priv,
enum nf_hook_ops_type {
	NF_HOOK_OP_UNDEFINED,
	NF_HOOK_OP_NF_TABLES,
	NF_HOOK_OP_BPF,
};

struct nf_hook_ops {
+15 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */

struct bpf_nf_ctx {
	const struct nf_hook_state *state;
	struct sk_buff *skb;
};

#if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK)
int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
#else
static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
	return -EOPNOTSUPP;
}
#endif
+14 −0
Original line number Diff line number Diff line
@@ -986,6 +986,7 @@ enum bpf_prog_type {
	BPF_PROG_TYPE_LSM,
	BPF_PROG_TYPE_SK_LOOKUP,
	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
	BPF_PROG_TYPE_NETFILTER,
};

enum bpf_attach_type {
@@ -1050,6 +1051,7 @@ enum bpf_link_type {
	BPF_LINK_TYPE_PERF_EVENT = 7,
	BPF_LINK_TYPE_KPROBE_MULTI = 8,
	BPF_LINK_TYPE_STRUCT_OPS = 9,
	BPF_LINK_TYPE_NETFILTER = 10,

	MAX_BPF_LINK_TYPE,
};
@@ -1560,6 +1562,12 @@ union bpf_attr {
				 */
				__u64		cookie;
			} tracing;
			struct {
				__u32		pf;
				__u32		hooknum;
				__s32		priority;
				__u32		flags;
			} netfilter;
		};
	} link_create;

@@ -6410,6 +6418,12 @@ struct bpf_link_info {
		struct {
			__u32 map_id;
		} struct_ops;
		struct {
			__u32 pf;
			__u32 hooknum;
			__s32 priority;
			__u32 flags;
		} netfilter;
	};
} __attribute__((aligned(8)));

Loading