xfrm: Add an inbound percpu state cache.

[ Upstream commit 81a331a0e7 ]

Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we add a percpu cache to cache the used inbound xfrm states.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Tested-by: Antony Antony <antony.antony@secunet.com>
Tested-by: Tobias Brunner <tobias@strongswan.org>
Stable-dep-of: e952837f3d ("xfrm: state: fix out-of-bounds read during lookup")
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Steffen Klassert 2024-10-23 12:53:44 +02:00 committed by Greg Kroah-Hartman
parent aa48a18fdb
commit 5e4334dc39
6 changed files with 70 additions and 7 deletions

View File

@ -43,6 +43,7 @@ struct netns_xfrm {
struct hlist_head __rcu *state_bysrc;
struct hlist_head __rcu *state_byspi;
struct hlist_head __rcu *state_byseq;
struct hlist_head __percpu *state_cache_input;
unsigned int state_hmask;
unsigned int state_num;
struct work_struct state_hash_work;

View File

@ -185,6 +185,7 @@ struct xfrm_state {
struct hlist_node byspi;
struct hlist_node byseq;
struct hlist_node state_cache;
struct hlist_node state_cache_input;
refcount_t refcnt;
spinlock_t lock;
@ -1650,6 +1651,10 @@ int xfrm_state_update(struct xfrm_state *x);
struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family);
struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
unsigned short family);
struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,

View File

@ -53,9 +53,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ip_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET);
x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ip_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */

View File

@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ipv6_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET6);
x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
(xfrm_address_t *)&ipv6_hdr(skb)->daddr,
spi, IPPROTO_ESP, AF_INET6);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */

View File

@ -572,7 +572,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family);
if (x == NULL) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);

View File

@ -754,6 +754,9 @@ int __xfrm_state_delete(struct xfrm_state *x)
hlist_del_rcu(&x->byseq);
if (!hlist_unhashed(&x->state_cache))
hlist_del_rcu(&x->state_cache);
if (!hlist_unhashed(&x->state_cache_input))
hlist_del_rcu(&x->state_cache_input);
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@ -1106,6 +1109,52 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
return NULL;
}
struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr,
__be32 spi, u8 proto,
unsigned short family)
{
struct hlist_head *state_cache_input;
struct xfrm_state *x = NULL;
int cpu = get_cpu();
state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu);
rcu_read_lock();
hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
if (x->props.family != family ||
x->id.spi != spi ||
x->id.proto != proto ||
!xfrm_addr_equal(&x->id.daddr, daddr, family))
continue;
if ((mark & x->mark.m) != x->mark.v)
continue;
if (!xfrm_state_hold_rcu(x))
continue;
goto out;
}
x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
if (x && x->km.state == XFRM_STATE_VALID) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
if (hlist_unhashed(&x->state_cache_input)) {
hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
} else {
hlist_del_rcu(&x->state_cache_input);
hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
}
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
}
out:
rcu_read_unlock();
put_cpu();
return x;
}
EXPORT_SYMBOL(xfrm_input_state_lookup);
static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@ -3079,6 +3128,11 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byseq = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byseq)
goto out_byseq;
net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
if (!net->xfrm.state_cache_input)
goto out_state_cache_input;
net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
net->xfrm.state_num = 0;
@ -3088,6 +3142,8 @@ int __net_init xfrm_state_init(struct net *net)
&net->xfrm.xfrm_state_lock);
return 0;
out_state_cache_input:
xfrm_hash_free(net->xfrm.state_byseq, sz);
out_byseq:
xfrm_hash_free(net->xfrm.state_byspi, sz);
out_byspi:
@ -3117,6 +3173,7 @@ void xfrm_state_fini(struct net *net)
xfrm_hash_free(net->xfrm.state_bysrc, sz);
WARN_ON(!hlist_empty(net->xfrm.state_bydst));
xfrm_hash_free(net->xfrm.state_bydst, sz);
free_percpu(net->xfrm.state_cache_input);
}
#ifdef CONFIG_AUDITSYSCALL