Commit 8073e960 authored by Florian Westphal's avatar Florian Westphal Committed by Pablo Neira Ayuso
Browse files

netfilter: nat: use keyed locks



no need to serialize on a single lock, we can partition the table and
add/delete in parallel to different slots.
This restores one of the advantages that got lost with the rhlist
revert.

Cc: Ivan Babrou <ibobrik@gmail.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent e1bf1687
Loading
Loading
Loading
Loading
+24 −12
Original line number Original line Diff line number Diff line
@@ -30,7 +30,7 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
#include <linux/netfilter/nf_nat.h>


static DEFINE_SPINLOCK(nf_nat_lock);
static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];


static DEFINE_MUTEX(nf_nat_proto_mutex);
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
@@ -425,13 +425,15 @@ nf_nat_setup_info(struct nf_conn *ct,


	if (maniptype == NF_NAT_MANIP_SRC) {
	if (maniptype == NF_NAT_MANIP_SRC) {
		unsigned int srchash;
		unsigned int srchash;
		spinlock_t *lock;


		srchash = hash_by_src(net,
		srchash = hash_by_src(net,
				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
		spin_lock_bh(&nf_nat_lock);
		lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)];
		spin_lock_bh(lock);
		hlist_add_head_rcu(&ct->nat_bysource,
		hlist_add_head_rcu(&ct->nat_bysource,
				   &nf_nat_bysource[srchash]);
				   &nf_nat_bysource[srchash]);
		spin_unlock_bh(&nf_nat_lock);
		spin_unlock_bh(lock);
	}
	}


	/* It's done. */
	/* It's done. */
@@ -525,6 +527,16 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
	return i->status & IPS_NAT_MASK ? 1 : 0;
	return i->status & IPS_NAT_MASK ? 1 : 0;
}
}


static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
	unsigned int h;

	h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
	spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
	hlist_del_rcu(&ct->nat_bysource);
	spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]);
}

static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
{
	if (nf_nat_proto_remove(ct, data))
	if (nf_nat_proto_remove(ct, data))
@@ -540,9 +552,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
	 * will delete entry from already-freed table.
	 * will delete entry from already-freed table.
	 */
	 */
	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
	clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
	spin_lock_bh(&nf_nat_lock);
	__nf_nat_cleanup_conntrack(ct);
	hlist_del_rcu(&ct->nat_bysource);
	spin_unlock_bh(&nf_nat_lock);


	/* don't delete conntrack.  Although that would make things a lot
	/* don't delete conntrack.  Although that would make things a lot
	 * simpler, we'd end up flushing all conntracks on nat rmmod.
	 * simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -670,11 +680,8 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
/* No one using conntrack by the time this called. */
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
{
	if (ct->status & IPS_SRC_NAT_DONE) {
	if (ct->status & IPS_SRC_NAT_DONE)
		spin_lock_bh(&nf_nat_lock);
		__nf_nat_cleanup_conntrack(ct);
		hlist_del_rcu(&ct->nat_bysource);
		spin_unlock_bh(&nf_nat_lock);
	}
}
}


static struct nf_ct_ext_type nat_extend __read_mostly = {
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -796,10 +803,12 @@ static struct nf_ct_helper_expectfn follow_master_nat = {


static int __init nf_nat_init(void)
static int __init nf_nat_init(void)
{
{
	int ret;
	int ret, i;


	/* Leave them the same for the moment. */
	/* Leave them the same for the moment. */
	nf_nat_htable_size = nf_conntrack_htable_size;
	nf_nat_htable_size = nf_conntrack_htable_size;
	if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks))
		nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks);


	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
	nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
	if (!nf_nat_bysource)
	if (!nf_nat_bysource)
@@ -812,6 +821,9 @@ static int __init nf_nat_init(void)
		return ret;
		return ret;
	}
	}


	for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++)
		spin_lock_init(&nf_nat_locks[i]);

	nf_ct_helper_expectfn_register(&follow_master_nat);
	nf_ct_helper_expectfn_register(&follow_master_nat);


	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);