Merge branch 'net-fib_rules-add-flow-label-selector-support'

Ido Schimmel says:

====================
net: fib_rules: Add flow label selector support

In some deployments users would like to encode path information into
certain bits of the IPv6 flow label, the UDP source port and the DSCP
and use this information to route packets accordingly.

Redirecting traffic to a routing table based on the flow label is not
currently possible with Linux as FIB rules cannot match on it despite
the flow label being available in the IPv6 flow key.

This patchset extends FIB rules to match on the flow label with a mask.
Future patches will add mask attributes to L4 ports and DSCP matches.

Patches #1-#5 gradually extend FIB rules to match on the flow label.

Patches #6-#7 allow user space to specify a flow label in route get
requests. This is useful for both debugging and testing.

Patch #8 adjusts the fib6_table_lookup tracepoint to print the flow
label to the trace buffer for better observability.

Patch #9 extends the FIB rule selftest with flow label test cases while
utilizing the route get functionality from patch #6.
====================

Link: https://patch.msgid.link/20241216171201.274644-1-idosch@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni 2024-12-19 16:02:24 +01:00
commit 6b3099ebca
10 changed files with 140 additions and 6 deletions

View File

@ -177,6 +177,11 @@ attribute-sets:
-
name: rta-nh-id
type: u32
-
name: rta-flowlabel
type: u32
byte-order: big-endian
display-hint: hex
-
name: rta-metrics
attributes:
@ -260,6 +265,7 @@ operations:
- rta-dport
- rta-mark
- rta-uid
- rta-flowlabel
reply:
value: 24
attributes: &all-route-attrs
@ -299,6 +305,7 @@ operations:
- rta-sport
- rta-dport
- rta-nh-id
- rta-flowlabel
dump:
request:
value: 26

View File

@ -172,6 +172,16 @@ attribute-sets:
-
name: dscp
type: u8
-
name: flowlabel
type: u32
byte-order: big-endian
display-hint: hex
-
name: flowlabel-mask
type: u32
byte-order: big-endian
display-hint: hex
operations:
enum-model: directional
@ -203,6 +213,8 @@ operations:
- sport-range
- dport-range
- dscp
- flowlabel
- flowlabel-mask
-
name: newrule-ntf
doc: Notify a rule creation

View File

@ -22,6 +22,7 @@ TRACE_EVENT(fib6_table_lookup,
__field( int, err )
__field( int, oif )
__field( int, iif )
__field( u32, flowlabel )
__field( __u8, tos )
__field( __u8, scope )
__field( __u8, flags )
@ -42,6 +43,7 @@ TRACE_EVENT(fib6_table_lookup,
__entry->err = ip6_rt_type_to_error(res->fib6_type);
__entry->oif = flp->flowi6_oif;
__entry->iif = flp->flowi6_iif;
__entry->flowlabel = ntohl(flowi6_get_flowlabel(flp));
__entry->tos = ip6_tclass(flp->flowlabel);
__entry->scope = flp->flowi6_scope;
__entry->flags = flp->flowi6_flags;
@ -76,11 +78,11 @@ TRACE_EVENT(fib6_table_lookup,
}
),
TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d",
TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u flowlabel %#x tos %d scope %d flags %x ==> dev %s gw %pI6c err %d",
__entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
__entry->src, __entry->sport, __entry->dst, __entry->dport,
__entry->tos, __entry->scope, __entry->flags,
__entry->name, __entry->gw, __entry->err)
__entry->flowlabel, __entry->tos, __entry->scope,
__entry->flags, __entry->name, __entry->gw, __entry->err)
);
#endif /* _TRACE_FIB6_H */

View File

@ -68,6 +68,8 @@ enum {
FRA_SPORT_RANGE, /* sport */
FRA_DPORT_RANGE, /* dport */
FRA_DSCP, /* dscp */
FRA_FLOWLABEL, /* flowlabel */
FRA_FLOWLABEL_MASK, /* flowlabel mask */
__FRA_MAX
};

View File

@ -393,6 +393,7 @@ enum rtattr_type_t {
RTA_SPORT,
RTA_DPORT,
RTA_NH_ID,
RTA_FLOWLABEL,
__RTA_MAX
};

View File

@ -770,6 +770,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
[FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
[FRA_FLOWLABEL] = { .type = NLA_BE32 },
[FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 },
};
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,

View File

@ -249,6 +249,12 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) {
NL_SET_ERR_MSG(extack,
"Flow label cannot be specified for IPv4 FIB rules");
goto errout;
}
if (!inet_validate_dscp(frh->tos)) {
NL_SET_ERR_MSG(extack,
"Invalid dsfield (tos): ECN bits must be 0");

View File

@ -26,6 +26,8 @@ struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
__be32 flowlabel;
__be32 flowlabel_mask;
dscp_t dscp;
u8 dscp_full:1; /* DSCP or TOS selector */
};
@ -34,7 +36,7 @@ static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
if (r->dst.plen || r->src.plen || r->dscp)
if (r->dst.plen || r->src.plen || r->dscp || r->flowlabel_mask)
return false;
return fib_rule_matchall(rule);
}
@ -332,6 +334,9 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
return 0;
if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask)
return 0;
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
return 0;
@ -360,6 +365,35 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
return 0;
}
static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6,
struct netlink_ext_ack *extack)
{
__be32 flowlabel, flowlabel_mask;
if (NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL) ||
NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL_MASK))
return -EINVAL;
flowlabel = nla_get_be32(tb[FRA_FLOWLABEL]);
flowlabel_mask = nla_get_be32(tb[FRA_FLOWLABEL_MASK]);
if (flowlabel_mask & ~IPV6_FLOWLABEL_MASK) {
NL_SET_ERR_MSG_ATTR(extack, tb[FRA_FLOWLABEL_MASK],
"Invalid flow label mask");
return -EINVAL;
}
if (flowlabel & ~flowlabel_mask) {
NL_SET_ERR_MSG(extack, "Flow label and mask do not match");
return -EINVAL;
}
rule6->flowlabel = flowlabel;
rule6->flowlabel_mask = flowlabel_mask;
return 0;
}
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@ -379,6 +413,10 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
goto errout;
if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) &&
fib6_nl2rule_flowlabel(tb, rule6, extack) < 0)
goto errout;
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@ -444,6 +482,14 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}
if (tb[FRA_FLOWLABEL] &&
nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel)
return 0;
if (tb[FRA_FLOWLABEL_MASK] &&
nla_get_be32(tb[FRA_FLOWLABEL_MASK]) != rule6->flowlabel_mask)
return 0;
if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
@ -472,6 +518,11 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
}
if (rule6->flowlabel_mask &&
(nla_put_be32(skb, FRA_FLOWLABEL, rule6->flowlabel) ||
nla_put_be32(skb, FRA_FLOWLABEL_MASK, rule6->flowlabel_mask)))
goto nla_put_failure;
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
(rule6->src.plen &&
@ -487,7 +538,9 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
+ nla_total_size(16) /* src */
+ nla_total_size(1); /* dscp */
+ nla_total_size(1) /* dscp */
+ nla_total_size(4) /* flowlabel */
+ nla_total_size(4); /* flowlabel mask */
}
static void fib6_rule_flush_cache(struct fib_rules_ops *ops)

View File

@ -5005,6 +5005,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
[RTA_NH_ID] = { .type = NLA_U32 },
[RTA_FLOWLABEL] = { .type = NLA_BE32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@ -5030,6 +5031,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
if (tb[RTA_FLOWLABEL]) {
NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
"Flow label cannot be specified for this operation");
goto errout;
}
*cfg = (struct fib6_config){
.fc_table = rtm->rtm_table,
.fc_dst_len = rtm->rtm_dst_len,
@ -6013,6 +6020,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return -EINVAL;
}
if (tb[RTA_FLOWLABEL] &&
(nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) {
NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
"Invalid flow label");
return -EINVAL;
}
for (i = 0; i <= RTA_MAX; i++) {
if (!tb[i])
continue;
@ -6027,6 +6041,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
case RTA_SPORT:
case RTA_DPORT:
case RTA_IP_PROTO:
case RTA_FLOWLABEL:
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
@ -6049,6 +6064,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6 = {};
__be32 flowlabel;
bool fibmatch;
err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
@ -6057,7 +6073,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = -EINVAL;
rtm = nlmsg_data(nlh);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
if (tb[RTA_SRC]) {
@ -6103,6 +6118,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel);
if (iif) {
struct net_device *dev;
int flags = 0;

View File

@ -291,6 +291,37 @@ fib_rule6_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
fib_check_iproute_support "flowlabel" "flowlabel"
if [ $? -eq 0 ]; then
match="flowlabel 0xfffff"
getmatch="flowlabel 0xfffff"
getnomatch="flowlabel 0xf"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "flowlabel redirect to table" \
"flowlabel no redirect to table"
match="flowlabel 0xfffff"
getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif flowlabel redirect to table" \
"iif flowlabel no redirect to table"
match="flowlabel 0x08000/0x08000"
getmatch="flowlabel 0xfffff"
getnomatch="flowlabel 0xf7fff"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "flowlabel masked redirect to table" \
"flowlabel masked no redirect to table"
match="flowlabel 0x08000/0x08000"
getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif flowlabel masked redirect to table" \
"iif flowlabel masked no redirect to table"
fi
}
fib_rule6_vrf_test()