Flower Classifier mellanox   flow_indr_block_cb_register                   TC_SETUP_BLOCK TC_SETUP_CLSFLOWER fl_classify tcf_proto_ops   cls_fl_ops cls_bpf  bpf action Linux flow offload提高路由转发效率

7.TC routine: htb

    qdisc_enqueue_root(sch_generic.h) -> qdisc_enqueue(sch_generic.h) -> htb_enqueue(sch_htb.c) ->htb_classify(sch_htb.c) -> flow_classify(cls_flow.c) -> tcf_exts_exec(pkt_cls.h) ->tcf_action_exec(act_api.c) -> tcf_act_police(act_police.c)

drivers/net/ethernet/mellanox/mlx5/core/en_rep.c:837:   err = __flow_indr_block_cb_register(netdev, rpriv,
include/net/flow_offload.h:396:int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
include/net/flow_offload.h:404:int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

 Tc Flower allows to specify three operating modes: skip_sw, skip_hw and not specified.

In the skip_sw mode TC Flower tries to offload the rule to the NIC driver. If this operation fails the rule will not be added. I

n the skip_hw mode TC Flower ignores completely the underling hardware and sets the rules in software.

If we do not specify the operating mode, TC Flower first tries to put the rule in hardware and if the operation fails tries to allocate it in software.

               if (f && !tc_skip_sw(f->flags)) {   //硬件offload不调用
                        *res = f->res;
                        return tcf_exts_exec(skb, &f->exts, res);
                }
static inline bool tc_skip_hw(u32 flags)
{
        return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
}

static inline bool tc_skip_sw(u32 flags)
{
        return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false;
}
net/sched/cls_bpf.c:422:        ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
net/sched/cls_flower.c没有调用tcf_exts_validate
 tcf_exts_validate
     tcf_action_init
           tcf_action_init_1

 Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

 Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

 Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

 Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

 Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

 

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

 

 

 

Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

 

 

 

 

 

TC_SETUP_BLOCK TC_SETUP_CLSFLOWER

 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
                              skip_sw, &f->flags, &f->in_hw_count, rtnl_held);

struct tcf_exts

struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
    __u32    type; /* for backward compat(TCA_OLD_COMPAT) */
    int nr_actions;
    struct tc_action **actions;
#endif
    /* Map to export classifier specific extension TLV types to the
     * generic extensions API. Unsupported extensions must be set to 0.
     */
    int action;
    int police;
};
struct tc_action {
// 私有数据
 void   *priv;
// 操作结构
 struct tc_action_ops *ops;
// 类型
 __u32   type; /* for backward compat(TCA_OLD_COMPAT) */
// 阶数
 __u32   order;
// 动作链表下一项
 struct tc_action *next;
};
#define TCA_CAP_NONE 0
// action操作结构, 实际就是定义目标操作, 通常每个匹配操作都由一个静态tcf_action_ops
// 结构定义, 作为一个内核模块, 初始化事登记系统的链表
struct tc_action_ops {
// 链表中的下一项
 struct tc_action_ops *next;
 struct tcf_hashinfo *hinfo;
// 名称
 char    kind[IFNAMSIZ];
 __u32   type; /* TBD to match kind */
 __u32  capab;  /* capabilities includes 4 bit version */
 struct module  *owner;
// 动作
 int     (*act)(struct sk_buff *, struct tc_action *, struct tcf_result *);
// 获取统计参数
 int     (*get_stats)(struct sk_buff *, struct tc_action *);
// 输出
 int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
// 清除
 int     (*cleanup)(struct tc_action *, int bind);
// 查找
 int     (*lookup)(struct tc_action *, u32);
// 初始化
 int     (*init)(struct rtattr *, struct rtattr *, struct tc_action *, int , int);
// 遍历
 int     (*walk)(struct sk_buff *, struct netlink_callback *, int, struct tc_action *);
};
/*
net/sched/cls_flower.c               Flower classifier
 *
 * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 */
  11
linux/kernel.h>
linux/init.h>
linux/module.h>
linux/rhashtable.h>
  16
linux/if_ether.h>
linux/in6.h>
linux/ip.h>
  20
net/sch_generic.h>
net/pkt_cls.h>
net/ip.h>
net/flow_dissector.h>
  25
fl_flow_key {
indev_ifindex;
control;
basic;
eth;
ipaddrs;
  32        union {
ipv4;
ipv6;
  35        };
tp;
/* Ensure that we can do comparisons as longs. */
  38
fl_flow_mask_range {
start;
end;
  42};
  43
fl_flow_mask {
key;
range;
rcu;
  48};
  49
cls_fl_head {
ht;
mask;
dissector;
hgen;
mask_assigned;
filters;
ht_params;
rcu;
  59};
  60
cls_fl_filter {
ht_node;
mkey;
exts;
res;
key;
list;
handle;
flags;
rcu;
  71};
  72
mask)
  74{
start;
  76}
  77
mask)
  79{
key;
key);
size - 1;
  83
i++) {
i]) {
i)
i;
i;
  89                }
  90        }
first, sizeof(long));
last + 1, sizeof(long));
  93}
  94
key,
mask)
  97{
start;
  99}
 100
key,
mask)
 103{
mask);
mask);
mask);
i;
 108
i += sizeof(long))
lmask++;
 111}
 112
key,
mask)
 115{
mask));
 117}
 118

 tcf_exts_exec

  • TCA_CLS_FLAGS_SKIP_HW:只在软件(系统内核TC模块)添加规则,不在硬件添加。如果规则不能添加则报错。
  • TCA_CLS_FLAGS_SKIP_SW:只在硬件(规则挂载的网卡)添加规则,不在软件添加。如果规则不能添加则报错。
  • 默认(不带标志位):尝试同时在硬件和软件下载规则,如果规则不能在软件添加则报错。

通过TC命令查看规则,如果规则已经卸载到硬件了,可以看到 in_hw标志位。


tp,
res)
 121{
root);
f;
skb_key;
skb_mkey;
 126
nelems))
 128                return -1;
 129
mask);
skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
         * so do it rather here.
         */
protocol;
skb_key, 0);
 137
mask);
 139
ht,
mask),
ht_params);
flags)) {
res;
res);
 146        }
 147        return -1;
 148}

tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
           struct tcf_result *res)
{
    if (exts->nr_actions)
        return tcf_action_exec(skb, exts->actions, exts->nr_actions,
                       res);
    return 0;
}

int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
            int nr_actions, struct tcf_result *res)
{
    int ret = -1, i;

    if (skb->tc_verd & TC_NCLS) {
        skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
        ret = TC_ACT_OK;
        goto exec_done;
    }
    for (i = 0; i < nr_actions; i++) {
        const struct tc_action *a = actions[i];

repeat:
        ret = a->ops->act(skb, a, res);
        if (ret == TC_ACT_REPEAT)
            goto repeat;    /* we need a ttl - JHS */
        if (ret != TC_ACT_PIPE)
            goto exec_done;
    }
exec_done:
    return ret;
}
net/sched/cls_flower.c  初始化

tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);

General informations

The Linux kernel configuration item CONFIG_MLX5_CLS_ACT:

 前动作为policce 应的回调为tcf_act_police,下面单独分析

tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
net/sched/cls_flow.c:441:       err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE);
et/sched/cls_flower.c:1577:    err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
                                int action, int police)
{
#ifdef CONFIG_NET_CLS_ACT
        exts->type = 0;
        exts->nr_actions = 0; // 不执行tcf_action_exec
        exts->net = net;
        exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
                                GFP_KERNEL);
        if (!exts->actions)
                return -ENOMEM;
#endif
        exts->action = action;
        exts->police = police;
        return 0;
}
Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率

fl_classify

 tcf_classify -->  fl_classify

static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
                       struct tcf_result *res)
{
        struct cls_fl_head *head = rcu_dereference_bh(tp->root);
        struct fl_flow_key skb_mkey;
        struct fl_flow_key skb_key;
        struct fl_flow_mask *mask;
        struct cls_fl_filter *f;

        list_for_each_entry_rcu(mask, &head->masks, list) {
                flow_dissector_init_keys(&skb_key.control, &skb_key.basic);
                fl_clear_masked_range(&skb_key, mask);

                skb_flow_dissect_meta(skb, &mask->dissector, &skb_key);
                /* skb_flow_dissect() does not set n_proto in case an unknown
                 * protocol, so do it rather here.
                 */
                skb_key.basic.n_proto = skb_protocol(skb, false);
                skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
                skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
                                    fl_ct_info_to_flower_map,
                                    ARRAY_SIZE(fl_ct_info_to_flower_map));
                skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);

                fl_set_masked_key(&skb_mkey, &skb_key, mask);

                f = fl_lookup(mask, &skb_mkey, &skb_key);
                if (f && !tc_skip_sw(f->flags)) {
                        *res = f->res;
                        return tcf_exts_exec(skb, &f->exts, res);
                }
        }
        return -1;
}
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
                                          u32 prio, struct tcf_chain *chain,
                                          bool rtnl_held,
                                          struct netlink_ext_ack *extack)
{
        struct tcf_proto *tp;
        int err;

        tp = kzalloc(sizeof(*tp), GFP_KERNEL);
        if (!tp)
                return ERR_PTR(-ENOBUFS);

        tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
        if (IS_ERR(tp->ops)) {
                err = PTR_ERR(tp->ops);
                goto errout;
        }
        tp->classify = tp->ops->classify;
        tp->protocol = protocol;
        tp->prio = prio;
        tp->chain = chain;
        spin_lock_init(&tp->lock);
        refcount_set(&tp->refcnt, 1);

        err = tp->ops->init(tp);
        if (err) {
                module_put(tp->ops->owner);
                goto errout;
        }
        return tp;

errout:
        kfree(tp);
        return ERR_PTR(err);
}
Flower Classifier
mellanox   flow_indr_block_cb_register
 
 
 
 
 
 
 
 
 
TC_SETUP_BLOCK TC_SETUP_CLSFLOWER
fl_classify
tcf_proto_ops   cls_fl_ops
cls_bpf
 bpf action
Linux flow offload提高路由转发效率



149 tp) 151{ head; 153 GFP_KERNEL); head) ENOBUFS; 157 filters); head); 160 161 return 0; 162} 163 head) 165{ rcu); 167 exts); f); 170} 171 cookie) 173{ dev; offload = {0}; tc; 177 tp, 0)) 179 return; 180 TC_CLSFLOWER_DESTROY; cookie; 183 TC_SETUP_CLSFLOWER; offload; 186 tc); 188} 189 tp, dissector, mask, key, actions, flags) 196{ dev; offload = {0}; tc; err; 201 flags)) EINVAL : 0; 204 TC_CLSFLOWER_REPLACE; cookie; dissector; mask; key; actions; 211 TC_SETUP_CLSFLOWER; offload; 214 tc
);
216 flags)) err; 219 220 return 0; 221} 222 f) 224{ dev; offload = {0}; tc; 228 tp, 0)) 230 return; 231 TC_CLSFLOWER_STATS; f; exts; 235 TC_SETUP_CLSFLOWER; offload; 238 tc); 240} 241 force) 243{ root); next; 246 filters)) false; 249 list) { f); list); fl_destroy_filter); 254 } NULL); mask_assigned) ht); rcu); true; 260} 261 handle) 263{ root); f; 266 list) handle) f; 270 return 0; 271} 272 TCA_FLOWER_MAX + 1] = { NLA_UNSPEC }, NLA_U32 }, NLA_STRING, IFNAMSIZ }, ETH_ALEN }, ETH_ALEN }, ETH_ALEN }, ETH_ALEN }, NLA_U16 }, NLA_U8 }, NLA_U32 }, NLA_U32 }, NLA_U32 }, NLA_U32 }, in6_addr) }, in6_addr) }, in6_addr) }, in6_addr) }, NLA_U16 }, NLA_U16 }, NLA_U16 }, NLA_U16 }, 296}; 297 tb, val_type, len) 301{ val_type]) 303 return; len); mask_type]) len); 307 else len); 309} 310 tb, mask) 313{ CONFIG_NET_CLS_IND TCA_FLOWER_INDEV]) { TCA_FLOWER_INDEV]); err < 0) err; err; indev_ifindex = 0xffffffff; 321 } 322#endif 323 TCA_FLOWER_KEY_ETH_DST, TCA_FLOWER_KEY_ETH_DST_MASK, dst)); TCA_FLOWER_KEY_ETH_SRC, TCA_FLOWER_KEY_ETH_SRC_MASK, src)); 330 TCA_FLOWER_KEY_ETH_TYPE, TCA_FLOWER_UNSPEC, n_proto)); 334 ETH_P_IP) || ETH_P_IPV6)) { TCA_FLOWER_KEY_IP_PROTO, TCA_FLOWER_UNSPEC, ip_proto)); 340 } 341 TCA_FLOWER_KEY_IPV4_DST]) { FLOW_DISSECTOR_KEY_IPV4_ADDRS; TCA_FLOWER_KEY_IPV4_SRC, TCA_FLOWER_KEY_IPV4_SRC_MASK, src)); TCA_FLOWER_KEY_IPV4_DST, TCA_FLOWER_KEY_IPV4_DST_MASK, dst)); TCA_FLOWER_KEY_IPV6_DST]) { FLOW_DISSECTOR_KEY_IPV6_ADDRS; TCA_FLOWER_KEY_IPV6_SRC, TCA_FLOWER_KEY_IPV6_SRC_MASK, src)); TCA_FLOWER_KEY_IPV6_DST, TCA_FLOWER_KEY_IPV6_DST_MASK, dst)); 358 } 359 IPPROTO_TCP) { TCA_FLOWER_KEY_TCP_SRC, TCA_FLOWER_UNSPEC, src)); TCA_FLOWER_KEY_TCP_DST, TCA_FLOWER_UNSPEC, dst)); IPPROTO_UDP) { TCA_FLOWER_KEY_UDP_SRC, TCA_FLOWER_UNSPEC, src)); TCA_FLOWER_KEY_UDP_DST, TCA_FLOWER_UNSPEC, dst)); 374 } 375 376 return 0; 377} 378 mask1, mask2) 381{ mask1); mask2); 384 range)) && mask1)); 387} 388 fl_ht_params = { /* base offset */ ht_node), true, 393}; 394 head, mask) 397{ fl_ht_params; mask); start; 401 ht_params); 403} 404 member) member)) member) member)) 409 member) end && start) 413 member) 415 do { id; member); cnt++; 419 } while(0); 420 member) 422 do { member)) member); 425 } while(0); 426 head, mask) 429{ FLOW_DISSECTOR_KEY_MAX]; cnt = 0; 432 control); basic); cnt, eth); cnt, ipv4); cnt, ipv6); cnt, tp); 443 cnt); 445} 446 head, mask) 449{ err; 451 mask_assigned) { mask)) EINVAL; 455 else 456 return 0; 457 } 458 /* Mask is not assigned yet. So assign it and init hashtable * according to that. */ mask); err) err; mask)); true; 467 mask); 469 470 return 0; 471} 472 tp, mask, tb, ovr) 477{ e; err; 480 TCA_FLOWER_ACT, 0); ovr); err < 0) err; 485 TCA_FLOWER_CLASSID]) { TCA_FLOWER_CLASSID]); base); 489 } 490 key); err) errout; 494 mask); mask); 497 e); 499 500 return 0; errout: e); err; 504} 505 tp, head) 508{ i = 0x80000000; handle; 511 512 do { hgen == 0x7FFFFFFF) hgen = 1; hgen)); 516 i == 0)) { handle = 0; 520 } else { hgen; 522 } 523 handle; 525} 526 in_skb, base, tca, ovr) 531{ root); arg; fnew; TCA_FLOWER_MAX + 1]; mask = {}; err; 538 TCA_OPTIONS]) EINVAL; 541 fl_policy); err < 0) err; 545 handle) EINVAL; 548 GFP_KERNEL); fnew) ENOBUFS; 552 TCA_FLOWER_ACT, 0); 554 handle) { head); handle) { EINVAL; errout; 560 } 561 } handle; 563 TCA_FLOWER_FLAGS]) { TCA_FLOWER_FLAGS]); 566 flags)) { EINVAL; errout; 570 } 571 } 572 ovr); err) errout; 576 mask); err) errout; 580 flags)) { ht_node, ht_params); err) errout; 586 } 587 //调用硬件offload tp
, dissector
, key
, key
, exts
, fnew
, flags
);
err) errout; 597 fold) { ht_node, ht_params); fold); 602 } 603 fnew; 605 fold) { list); res); fl_destroy_filter); 610 } else { filters); 612 } 613 614 return 0; 615 errout: fnew); err; 619} 620 arg) 622{ root); arg; 625 ht_node, ht_params); list); f); res); fl_destroy_filter); 632 return 0; 633} 634 arg) 636{ root); f; 639 list) { skip) skip; arg) < 0) { stop = 1; 645 break; 646 } skip: count++; 649 } 650} 651 skb, val_type, len) 655{ err; 657 len)) 659 return 0; val); err) err; TCA_FLOWER_UNSPEC) { mask); err) err; 667 } 668 return 0; 669} 670 fh, t) 673{ root); fh; nest; mask; 678 f) len; 681 handle; 683 TCA_OPTIONS); nest) nla_put_failure; 687 classid && classid)) nla_put_failure; 691 key; key; 694 indev_ifindex) { dev; 697 indev_ifindex); name)) nla_put_failure; 701 } 702 f); 704 TCA_FLOWER_KEY_ETH_DST, TCA_FLOWER_KEY_ETH_DST_MASK, dst)) || TCA_FLOWER_KEY_ETH_SRC, TCA_FLOWER_KEY_ETH_SRC_MASK, src)) || TCA_FLOWER_KEY_ETH_TYPE, TCA_FLOWER_UNSPEC, n_proto))) nla_put_failure; ETH_P_IP) || ETH_P_IPV6)) && TCA_FLOWER_KEY_IP_PROTO, TCA_FLOWER_UNSPEC, ip_proto))) nla_put_failure; 721 FLOW_DISSECTOR_KEY_IPV4_ADDRS && TCA_FLOWER_KEY_IPV4_SRC, TCA_FLOWER_KEY_IPV4_SRC_MASK, src)) || TCA_FLOWER_KEY_IPV4_DST, TCA_FLOWER_KEY_IPV4_DST_MASK, dst)))) nla_put_failure; FLOW_DISSECTOR_KEY_IPV6_ADDRS && TCA_FLOWER_KEY_IPV6_SRC, TCA_FLOWER_KEY_IPV6_SRC_MASK, src)) || TCA_FLOWER_KEY_IPV6_DST, TCA_FLOWER_KEY_IPV6_DST_MASK, dst)))) nla_put_failure; 738 IPPROTO_TCP && TCA_FLOWER_KEY_TCP_SRC, TCA_FLOWER_UNSPEC, src)) || TCA_FLOWER_KEY_TCP_DST, TCA_FLOWER_UNSPEC, dst)))) nla_put_failure; IPPROTO_UDP && TCA_FLOWER_KEY_UDP_SRC, TCA_FLOWER_UNSPEC, src)) || TCA_FLOWER_KEY_UDP_DST, TCA_FLOWER_UNSPEC, dst)))) nla_put_failure; 755 flags); 757 exts)) nla_put_failure; 760 nest); 762 exts) < 0) nla_put_failure; 765 len; 767 nla_put_failure: nest); 770 return -1; 771} 772

tcf_proto_ops   cls_fl_ops


__read_mostly = { fl_classify, fl_init, fl_destroy, fl_get, fl_change, fl_delete, fl_walk, fl_dump, THIS_MODULE, 784}; 785 cls_fl_init(void) 787{ cls_fl_ops); 789} 790 cls_fl_exit(void) 792{ cls_fl_ops); 794} 795 cls_fl_init); cls_fl_exit); 798 802

cls_bpf



static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
    .kind        =    "bpf",
    .owner        =    THIS_MODULE,
    .classify    =    cls_bpf_classify,
    .init        =    cls_bpf_init,
    .destroy    =    cls_bpf_destroy,
    .get        =    cls_bpf_get,
    .change        =    cls_bpf_change,
    .delete        =    cls_bpf_delete,
    .walk        =    cls_bpf_walk,
    .dump        =    cls_bpf_dump,
    .bind_class    =    cls_bpf_bind_class,
};
 
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
              struct tcf_proto *tp, unsigned long base,
              u32 handle, struct nlattr **tca,
              void **arg, bool ovr)
{
    struct cls_bpf_head *head = rtnl_dereference(tp->root);
    struct cls_bpf_prog *oldprog = *arg;
    struct nlattr *tb[TCA_BPF_MAX + 1];
    struct cls_bpf_prog *prog;
    int ret;
    if (tca[TCA_OPTIONS] == NULL)
        return -EINVAL;
    ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy,
                   NULL);
    if (ret < 0)
        return ret;
    prog = kzalloc(sizeof(*prog), GFP_KERNEL);
    if (!prog)
        return -ENOBUFS;
    ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
    if (ret < 0)
        goto errout;
    if (oldprog) {
        if (handle && oldprog->handle != handle) {
            ret = -EINVAL;
            goto errout;
        }
    }
    if (handle == 0)
        prog->handle = cls_bpf_grab_new_handle(tp, head);
    else
        prog->handle = handle;
    if (prog->handle == 0) {
        ret = -EINVAL;
        goto errout;
    }
    ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
    if (ret < 0)
        goto errout;
    ret = cls_bpf_offload(tp, prog, oldprog);
    if (ret) {
        __cls_bpf_delete_prog(prog);
        return ret;
    }
    if (!tc_in_hw(prog->gen_flags))
        prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
    if (oldprog) {
        list_replace_rcu(&oldprog->link, &prog->link);
        tcf_unbind_filter(tp, &oldprog->res);
        call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
    } else {
        list_add_rcu(&prog->link, &head->plist);
    }
    *arg = prog;
    return 0;
errout:
    tcf_exts_destroy(&prog->exts);
    kfree(prog);
    return ret;
}

 bpf action

static struct tc_action_ops act_bpf_ops __read_mostly = {
        .kind           =       "bpf",
        .id             =       TCA_ID_BPF,
        .owner          =       THIS_MODULE,
        .act            =       tcf_bpf_act,
        .dump           =       tcf_bpf_dump,
        .cleanup        =       tcf_bpf_cleanup,
        .init           =       tcf_bpf_init,
        .walk           =       tcf_bpf_walker,
        .lookup         =       tcf_bpf_search,
        .size           =       sizeof(struct tcf_bpf),
};

static __net_init int bpf_init_net(struct net *net)
{
        struct tc_action_net *tn = net_generic(net, bpf_net_id);

        return tc_action_net_init(net, tn, &act_bpf_ops);
}
static int __init bpf_init_module(void)
{
        return tcf_register_action(&act_bpf_ops, &bpf_net_ops);
}
 

Linux flow offload提高路由转发效率

https://blog.csdn.net/dog250/article/details/103422860