Linux内核中的IPSEC兑现(3)

Linux内核中的IPSEC实现(3)
本文档的Copyleft归yfydz所有，使用GPL发布，可以自由拷贝，转载，转载时请保持文档的完整性，严禁用于任何商业用途。
msn: yfydz_no1@hotmail.com
来源：http://yfydz.cublog.cn
5. 安全策略(xfrm_policy)处理

本节所介绍的函数都在net/xfrm/xfrm_policy.c中定义。

5.1 策略分配

策略分配函数为xfrm_policy_alloc(), 该函数被pfkey_spdadd()函数调用

struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
{
 struct xfrm_policy *policy;
// 分配struct xfrm_policy结构空间并清零
 policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 if (policy) {
// 初始化链接节点
  INIT_HLIST_NODE(&policy->bydst);
  INIT_HLIST_NODE(&policy->byidx);
// 初始化锁
  rwlock_init(&policy->lock);
// 策略引用计数初始化为1
  atomic_set(&policy->refcnt, 1);
// 初始化定时器
  init_timer(&policy->timer);
  policy->timer.data = (unsigned long)policy;
  policy->timer.function = xfrm_policy_timer;
 }
 return policy;
}
EXPORT_SYMBOL(xfrm_policy_alloc);

定时器函数:
static void xfrm_policy_timer(unsigned long data)
{
 struct xfrm_policy *xp = (struct xfrm_policy*)data;
 unsigned long now = (unsigned long)xtime.tv_sec;
 long next = LONG_MAX;
 int warn = 0;
 int dir;
// 加锁
 read_lock(&xp->lock);
// 如果策略已经是死的, 退出
 if (xp->dead)
  goto out;
// 根据策略索引号确定策略处理的数据的方向, 看索引号的后3位
 dir = xfrm_policy_id2dir(xp->index);
// 如果到期了还要强制要增加一些时间
 if (xp->lft.hard_add_expires_seconds) {
// 计算强制增加的超时时间
  long tmo = xp->lft.hard_add_expires_seconds +
   xp->curlft.add_time - now;
// 没法增加超时了, 到期
  if (tmo <= 0)
   goto expired;
  if (tmo < next)
   next = tmo;
 }
// 如果到期了还要强制要增加的使用时间
 if (xp->lft.hard_use_expires_seconds) {
// 计算强制增加的使用时间
  long tmo = xp->lft.hard_use_expires_seconds +
   (xp->curlft.use_time ? : xp->curlft.add_time) - now;
// 没法增加超时了, 到期
  if (tmo <= 0)
   goto expired;
  if (tmo < next)
   next = tmo;
 }
// 如果到期了还要软性要增加一些时间
 if (xp->lft.soft_add_expires_seconds) {
// 计算软性增加的时间
  long tmo = xp->lft.soft_add_expires_seconds +
   xp->curlft.add_time - now;
// 软性增加超时小于0, 设置报警标志, 并将超时设置为XFRM_KM_TIMEOUT, 这点和其他不同
  if (tmo <= 0) {
   warn = 1;
   tmo = XFRM_KM_TIMEOUT;
  }
  if (tmo < next)
   next = tmo;
 }
// 如果到期了还要软性要增加的使用时间
 if (xp->lft.soft_use_expires_seconds) {
// 计算软性增加的使用时间
  long tmo = xp->lft.soft_use_expires_seconds +
   (xp->curlft.use_time ? : xp->curlft.add_time) - now;
// 软性增加超时小于0, 设置报警标志, 并将超时设置为XFRM_KM_TIMEOUT, 这点和其他不同
  if (tmo <= 0) {
   warn = 1;
   tmo = XFRM_KM_TIMEOUT;
  }
  if (tmo < next)
   next = tmo;
 }
// 需要报警, 调用到期回调
 if (warn)
  km_policy_expired(xp, dir, 0, 0);
// 如果更新的超时值有效, 修改定时器超时, 增加策略使用计数
 if (next != LONG_MAX &&
     !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
  xfrm_pol_hold(xp);
out:
 read_unlock(&xp->lock);
 xfrm_pol_put(xp);
 return;
expired:
 read_unlock(&xp->lock);
// 如果确实到期, 删除策略
 if (!xfrm_policy_delete(xp, dir))
// 1表示是硬性到期了
  km_policy_expired(xp, dir, 1, 0);
 xfrm_pol_put(xp);
}
 
5.2 策略插入

策略插入函数为xfrm_policy_insert(), 该函数被pfkey_spdadd()函数调用, 注意策略链表是按优先权大小进行排序的有序链表, 因此插入策略时要进行优先权比较后插入到合适的位置.

int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
 struct xfrm_policy *pol;
 struct xfrm_policy *delpol;
 struct hlist_head *chain;
 struct hlist_node *entry, *newpos, *last;
 struct dst_entry *gc_list;
 write_lock_bh(&xfrm_policy_lock);
// 找到具体的hash链表
 chain = policy_hash_bysel(&policy->selector, policy->family, dir);
 delpol = NULL;
 newpos = NULL;
 last = NULL;
// 遍历链表, 该链表是以策略的优先级值进行排序的链表, 因此需要根据新策略的优先级大小
// 将新策略插到合适的位置
 hlist_for_each_entry(pol, entry, chain, bydst) {
// delpol要为空
  if (!delpol &&
// 策略类型比较
      pol->type == policy->type &&
// 选择子比较
      !selector_cmp(&pol->selector, &policy->selector) &&
// 安全上下文比较
      xfrm_sec_ctx_match(pol->security, policy->security)) {
// 新策略和已有的某策略匹配
   if (excl) {
// 如果是排他性添加操作, 要插入的策略在数据库中已经存在, 发生错误
    write_unlock_bh(&xfrm_policy_lock);
    return -EEXIST;
   }
// 保存好要删除的策略位置
   delpol = pol;
// 要更新的策略优先级值大于原有的优先级值, 重新循环找到合适的插入位置
// 因为这个链表是以优先级值进行排序的, 不能乱
// 现在delpol已经非空了,  前面的策略查找条件已经不可能满足了
   if (policy->priority > pol->priority)
    continue;
  } else if (policy->priority >= pol->priority) {
// 如果新的优先级不低于当前的优先级, 保存当前节点, 继续查找合适插入位置
   last = &pol->bydst;
   continue;
  }
// 这里是根据新策略的优先级确定的插入位置
  if (!newpos)
   newpos = &pol->bydst;
// 如果已经找到要删除的策略, 中断
  if (delpol)
   break;
  last = &pol->bydst;
 }
 if (!newpos)
  newpos = last;
// 插入策略到按目的地址HASH的链表的指定位置
 if (newpos)
  hlist_add_after(newpos, &policy->bydst);
 else
  hlist_add_head(&policy->bydst, chain);
// 增加策略引用计数
 xfrm_pol_hold(policy);
// 该方向的策略数增1
 xfrm_policy_count[dir]++;
 atomic_inc(&flow_cache_genid);
// 如果有相同的老策略, 要从目的地址HASH和索引号HASH这两个表中删除
 if (delpol) {
  hlist_del(&delpol->bydst);
  hlist_del(&delpol->byidx);
  xfrm_policy_count[dir]--;
 }
// 获取策略索引号, 插入索引HASH链表
 policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
 hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
// 策略插入实际时间
 policy->curlft.add_time = (unsigned long)xtime.tv_sec;
 policy->curlft.use_time = 0;
 if (!mod_timer(&policy->timer, jiffies + HZ))
  xfrm_pol_hold(policy);
 write_unlock_bh(&xfrm_policy_lock);
// 释放老策略
 if (delpol)
  xfrm_policy_kill(delpol);
 else if (xfrm_bydst_should_resize(dir, NULL))
  schedule_work(&xfrm_hash_work);
// 下面释放所有策略当前的路由cache
 read_lock_bh(&xfrm_policy_lock);
 gc_list = NULL;
 entry = &policy->bydst;
// 遍历链表, 搜集垃圾路由cache建立链表
 hlist_for_each_entry_continue(policy, entry, bydst) {
  struct dst_entry *dst;
  write_lock(&policy->lock);
// 策略的路由链表头
  dst = policy->bundles;
  if (dst) {
// 直接将整个策略路由链表加到垃圾链表前面
   struct dst_entry *tail = dst;
   while (tail->next)
    tail = tail->next;
   tail->next = gc_list;
   gc_list = dst;
// 当前策略的路由为空
   policy->bundles = NULL;
  }
  write_unlock(&policy->lock);
 }
 read_unlock_bh(&xfrm_policy_lock);
// 释放垃圾路由cahce
 while (gc_list) {
  struct dst_entry *dst = gc_list;
  gc_list = dst->next;
  dst_free(dst);
 }
 return 0;
}
EXPORT_SYMBOL(xfrm_policy_insert);
 
5.3  删除某类型的全部安全策略

该函数被pfkey_spdflush()等函数调用

void xfrm_policy_flush(u8 type)
{
 int dir;
 write_lock_bh(&xfrm_policy_lock);
 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
  struct xfrm_policy *pol;
  struct hlist_node *entry;
  int i, killed;
  killed = 0;
 again1:
// 遍历inexact HASH链表
  hlist_for_each_entry(pol, entry,
         &xfrm_policy_inexact[dir], bydst) {
// 判断类型
   if (pol->type != type)
    continue;
// 将策略从bydst链表中断开
   hlist_del(&pol->bydst);
// 将策略从byidt链表中断开
   hlist_del(&pol->byidx);
   write_unlock_bh(&xfrm_policy_lock);
// 将策略状态置为dead, 并添加到系统的策略垃圾链表进行调度处理准备删除
   xfrm_policy_kill(pol);
   killed++;
   write_lock_bh(&xfrm_policy_lock);
   goto again1;
  }
// 遍历所有目的HASH链表
  for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
 again2:
// 遍历按目的地址HASH的链表
   hlist_for_each_entry(pol, entry,
          xfrm_policy_bydst[dir].table + i,
          bydst) {
    if (pol->type != type)
     continue;
// 将节点从链表中断开
    hlist_del(&pol->bydst);
    hlist_del(&pol->byidx);
    write_unlock_bh(&xfrm_policy_lock);
// 释放节点
    xfrm_policy_kill(pol);
    killed++;
    write_lock_bh(&xfrm_policy_lock);
    goto again2;
   }
  }
  xfrm_policy_count[dir] -= killed;
 }
 atomic_inc(&flow_cache_genid);
 write_unlock_bh(&xfrm_policy_lock);
}
EXPORT_SYMBOL(xfrm_policy_flush);
 
/* Rule must be locked. Release descentant resources, announce
 * entry dead. The rule must be unlinked from lists to the moment.
 */
// 策略释放到垃圾链表
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
 int dead;
 write_lock_bh(&policy->lock);
// 保留老的DEAD标志
 dead = policy->dead;
// 设置策略DEAD标志
 policy->dead = 1;
 write_unlock_bh(&policy->lock);
// 为什么不在前面判断DEAD呢?
 if (unlikely(dead)) {
  WARN_ON(1);
  return;
 }
 spin_lock(&xfrm_policy_gc_lock);
// 将该策略节点从当前链表断开, 插入策略垃圾链表
 hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
 spin_unlock(&xfrm_policy_gc_lock);
// 调度策略垃圾策略工作结构
 schedule_work(&xfrm_policy_gc_work);
}

5.4 策略查找

5.4.1 策略查找并删除

根据选择子和安全上下文查找策略, 可查找策略并删除, 被pfkey_spddelete()函数调用

struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
       struct xfrm_selector *sel,
       struct xfrm_sec_ctx *ctx, int delete)
{
 struct xfrm_policy *pol, *ret;
 struct hlist_head *chain;
 struct hlist_node *entry;
 write_lock_bh(&xfrm_policy_lock);
// 定位HASH表
 chain = policy_hash_bysel(sel, sel->family, dir);
 ret = NULL;
// 遍历链表
 hlist_for_each_entry(pol, entry, chain, bydst) {
// 根据类型, 选择子和上下文进行匹配
  if (pol->type == type &&
      !selector_cmp(sel, &pol->selector) &&
      xfrm_sec_ctx_match(ctx, pol->security)) {
   xfrm_pol_hold(pol);
   if (delete) {
// 要的删除话将策略节点从目的地址HASH链表和索引HASH链表中断开
    hlist_del(&pol->bydst);
    hlist_del(&pol->byidx);
    xfrm_policy_count[dir]--;
   }
   ret = pol;
   break;
  }
 }
 write_unlock_bh(&xfrm_policy_lock);
 if (ret && delete) {
// 增加genid
  atomic_inc(&flow_cache_genid);
// 将策略状态置为dead, 并添加到系统的策略垃圾链表进行调度处理准备删除
  xfrm_policy_kill(ret);
 }
 return ret;
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);

5.4.2 按索引号查找并删除

struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete)
{
 struct xfrm_policy *pol, *ret;
 struct hlist_head *chain;
 struct hlist_node *entry;
 write_lock_bh(&xfrm_policy_lock);
// 根据索引号定位链表
 chain = xfrm_policy_byidx + idx_hash(id);
 ret = NULL;
// 遍历链表
 hlist_for_each_entry(pol, entry, chain, byidx) {
// 策略的类型和索引号相同
  if (pol->type == type && pol->index == id) {
   xfrm_pol_hold(pol);
// 如果要删除, 将策略节点从链表中删除
   if (delete) {
    hlist_del(&pol->bydst);
    hlist_del(&pol->byidx);
    xfrm_policy_count[dir]--;
   }
   ret = pol;
   break;
  }
 }
 write_unlock_bh(&xfrm_policy_lock);
 if (ret && delete) {
// 增加genid
  atomic_inc(&flow_cache_genid);
// 将策略状态置为dead, 并添加到系统的策略垃圾链表进行调度处理准备删除
  xfrm_policy_kill(ret);
 }
 return ret;
}
EXPORT_SYMBOL(xfrm_policy_byid);

5.4.3 根据路由查找策略

// 参数fl是路由相关的结构, 常用于路由查找中
// 注意返回值是整数, 0成功, 非0失败, 找到的策略通过参数objp进行传递
static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
          void **objp, atomic_t **obj_refp)
{
 struct xfrm_policy *pol;
 int err = 0;
#ifdef CONFIG_XFRM_SUB_POLICY
// 子策略查找, 属于Linux自己的扩展功能, 非标准功能
 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
 if (IS_ERR(pol)) {
  err = PTR_ERR(pol);
  pol = NULL;
 }
 if (pol || err)
  goto end;
#endif
// 查找MAIN类型的策略
 pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 if (IS_ERR(pol)) {
  err = PTR_ERR(pol);
  pol = NULL;
 }
#ifdef CONFIG_XFRM_SUB_POLICY
end:
#endif
// 将找到的策略赋值给objp返回
 if ((*objp = (void *) pol) != NULL)
  *obj_refp = &pol->refcnt;
 return err;
}

// 按类型查找策略
static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
           u16 family, u8 dir)
{
 int err;
 struct xfrm_policy *pol, *ret;
 xfrm_address_t *daddr, *saddr;
 struct hlist_node *entry;
 struct hlist_head *chain;
 u32 priority = ~0U;
// 由流结构的目的和源地址
 daddr = xfrm_flowi_daddr(fl, family);
 saddr = xfrm_flowi_saddr(fl, family);
 if (unlikely(!daddr || !saddr))
  return NULL;
 read_lock_bh(&xfrm_policy_lock);
// 根据地址信息查找HASH链表
 chain = policy_hash_direct(daddr, saddr, family, dir);
 ret = NULL;
// 循环HASH链表
 hlist_for_each_entry(pol, entry, chain, bydst) {
// 检查流结构,类型和协议族是否匹配策略, 返回0表示匹配
  err = xfrm_policy_match(pol, fl, type, family, dir);
  if (err) {
   if (err == -ESRCH)
    continue;
   else {
    ret = ERR_PTR(err);
    goto fail;
   }
  } else {
// 备份找到的策略和优先级
   ret = pol;
   priority = ret->priority;
   break;
  }
 }
// 再在inexact链表中查找策略, 如果也找到策略, 而且优先级更小,
// 将新找到的策略替代前面找到的策略
 chain = &xfrm_policy_inexact[dir];
// 循环HASH链表
 hlist_for_each_entry(pol, entry, chain, bydst) {
// 检查流结构,类型和协议族是否匹配策略, 返回0表示匹配
  err = xfrm_policy_match(pol, fl, type, family, dir);
  if (err) {
   if (err == -ESRCH)
    continue;
   else {
    ret = ERR_PTR(err);
    goto fail;
   }
  } else if (pol->priority < priority) {
// 如果新找到的策略优先级更小, 将其取代原来找到的策略
   ret = pol;
   break;
  }
 }
 if (ret)
  xfrm_pol_hold(ret);
fail:
 read_unlock_bh(&xfrm_policy_lock);
 return ret;
}
// 检查xfrm策略是否和流参数匹配
// 返回0表示匹配成功
static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
        u8 type, u16 family, int dir)
{
// 选择子
 struct xfrm_selector *sel = &pol->selector;
 int match, ret = -ESRCH;
// 检查策略协议族和类型是否匹配
 if (pol->family != family ||
     pol->type != type)
  return ret;
// 检查选择子是否匹配, 返回非0值表示匹配成功
 match = xfrm_selector_match(sel, fl, family);
 if (match)
// 这种security函数可以不用考虑, 当作返回0的函数即可
  ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
 return ret;
}
// 选择子匹配,分别对IPV4和IPV6协议族比较
static inline int
xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
      unsigned short family)
{
 switch (family) {
 case AF_INET:
  return __xfrm4_selector_match(sel, fl);
 case AF_INET6:
  return __xfrm6_selector_match(sel, fl);
 }
 return 0;
}
//IPV4协议族选择子比较
static inline int
__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
{
// 比较V4目的地址, V4源地址, 目的端口, 源端口, 协议, 网卡索引号
 return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
  addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
  !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
  !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
  (fl->proto == sel->proto || !sel->proto) &&
  (fl->oif == sel->ifindex || !sel->ifindex);
}
//IPV6协议族选择子比较
static inline int
__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
{
// 比较V6目的地址, V6源地址, 目的端口, 源端口, 协议, 网卡索引号
 return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
  addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
  !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
  !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
  (fl->proto == sel->proto || !sel->proto) &&
  (fl->oif == sel->ifindex || !sel->ifindex);
}

5.4.4 查找和sock对应的策略
static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
{
 struct xfrm_policy *pol;
 read_lock_bh(&xfrm_policy_lock);
// sock结构中有sk_policy用来指向双向数据的安全策略
 if ((pol = sk->sk_policy[dir]) != NULL) {
// 检查该策略的选择子是否和流结构匹配
   int match = xfrm_selector_match(&pol->selector, fl,
      sk->sk_family);
   int err = 0;
// 如果匹配的话将策略作为结果返回
  if (match) {
// 这个security函数可视为返回0的空函数
   err = security_xfrm_policy_lookup(pol, fl->secid,
     policy_to_flow_dir(dir));
   if (!err)
    xfrm_pol_hold(pol);
   else if (err == -ESRCH)
    pol = NULL;
   else
    pol = ERR_PTR(err);
  } else
   pol = NULL;
 }
 read_unlock_bh(&xfrm_policy_lock);
 return pol;
}

5.5 遍历安全策略

该函数被pfkey_spddump()等函数中调用

// func函数用来指定对遍历的策略进行的查找
// 实际遍历了两次所有策略
int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
       void *data)
{
 struct xfrm_policy *pol;
 struct hlist_node *entry;
 int dir, count, error;
 read_lock_bh(&xfrm_policy_lock);
 count = 0;
// 先统计符合类型的策略的总数量, 方向是双向的
 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
  struct hlist_head *table = xfrm_policy_bydst[dir].table;
  int i;
// inexact HASH表
  hlist_for_each_entry(pol, entry,
         &xfrm_policy_inexact[dir], bydst) {
   if (pol->type == type)
    count++;
  }
// 遍历按地址HASH的链表
  for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
// 遍历链表
   hlist_for_each_entry(pol, entry, table + i, bydst) {
    if (pol->type == type)
     count++;
   }
  }
 }
 if (count == 0) {
  error = -ENOENT;
  goto out;
 }
// 重新遍历HASH表, 当前的count值作为SA的序号, 因此用户空间收到的序号是递减的
 for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
  struct hlist_head *table = xfrm_policy_bydst[dir].table;
  int i;
// 遍历inexact链表
  hlist_for_each_entry(pol, entry,
         &xfrm_policy_inexact[dir], bydst) {
   if (pol->type != type)
    continue;
// 对符合类型的策略调用func函数
   error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
   if (error)
    goto out;
  }
// 遍历按地址HASH的链表
  for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
   hlist_for_each_entry(pol, entry, table + i, bydst) {
    if (pol->type != type)
     continue;
// 对符合类型的策略调用func函数, 当count递减到0时表示是最后一个策略了
    error = func(pol, dir % XFRM_POLICY_MAX, --count, data);
    if (error)
     goto out;
   }
  }
 }
 error = 0;
out:
 read_unlock_bh(&xfrm_policy_lock);
 return error;
}
EXPORT_SYMBOL(xfrm_policy_walk);
 
5.5 策略检查

__xfrm_policy_check函数也是一个比较重要的函数, 被xfrm_policy_check()调用, 又被xfrm4_policy_check()和xfrm6_policy_check()调用, 而这两个函数在网络层的输入和转发处调用.
对普通包就返回合法, 对IPSEC包检查策略是否合法, 是否和路由方向匹配

// 返回1表示合法, 0表示不合法, 对于该函数返回0的数据包通常是被丢弃
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
   unsigned short family)
{
 struct xfrm_policy *pol;
 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 int npols = 0;
 int xfrm_nr;
 int pi;
 struct flowi fl;
// 将策略方向转换为流方向, 其实值是一样的
 u8 fl_dir = policy_to_flow_dir(dir);
 int xerr_idx = -1;
// 调用协议族的decode_session()函数, 对IPV4来说就是_decode_session4
// 将skb中的地址端口等信息填入流结构fl中
 if (xfrm_decode_session(skb, &fl, family) < 0)
  return 0;
// 如果内核支持NETFILTER, 将调用ip_nat_decode_session函数填写NAT信息
// 否则的话就是个空函数
 nf_nat_decode_session(skb, &fl, family);
 /* First, check used SA against their selectors. */
 if (skb->sp) {
// 该包是进行了解密后的IPSEC包
  int i;
  for (i=skb->sp->len-1; i>=0; i--) {
// 获取该包相关的SA信息
   struct xfrm_state *x = skb->sp->xvec[i];
// 检查SA选择子和流参数(路由)是否匹配, 结果为0表示不匹配, 不匹配的话返回
   if (!xfrm_selector_match(&x->sel, &fl, family))
    return 0;
  }
 }
 pol = NULL;
// 如果sock结构中有策略
 if (sk && sk->sk_policy[dir]) {
// 检查策略是否和流结构匹配, 匹配的话返回策略
  pol = xfrm_sk_policy_lookup(sk, dir, &fl);
  if (IS_ERR(pol))
   return 0;
 }
// 查找路由信息, 如果没有就创建路由, xfrm_policy_lookup()函数作为参数传递给
// flow_cache_lookup()函数, 查找和该路由对应的安全策略
 if (!pol)
  pol = flow_cache_lookup(&fl, family, fl_dir,
     xfrm_policy_lookup);
// 查找过程中出错,返回0
 if (IS_ERR(pol))
  return 0;
// 策略不存在
 if (!pol) {
// 如果该包是IPSEC包而且安全路径中的SA不是传输模式,
// 转发时, 对于已经封装的包没必要再次封装;
// 输入时, 是自身的IPSEC通信包封装基本也无意义
  if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
// 拒绝该安全路径, 返回0失败
   xfrm_secpath_reject(xerr_idx, skb, &fl);
   return 0;
  }
// 普通包处理, 安全策略不存在, 返回1
  return 1;
 }
// 找到安全策略, 对该包要根据策略进行IPSEC处理
// 更新策略当前使用时间
 pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 pols[0] = pol;
 npols ++;
#ifdef CONFIG_XFRM_SUB_POLICY
// 如果定义了子策略的话极限查找子策略, 这是标准IPSEC中没定义的, 可以不考虑
 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
  pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
          &fl, family,
          XFRM_POLICY_IN);
  if (pols[1]) {
   if (IS_ERR(pols[1]))
    return 0;
   pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
   npols ++;
  }
 }
#endif
// 策略动作是允许通过
 if (pol->action == XFRM_POLICY_ALLOW) {
  struct sec_path *sp;
// 先伪造个安全路径
  static struct sec_path dummy;
  struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
  struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
  struct xfrm_tmpl **tpp = tp;
  int ti = 0;
  int i, k;
// 如果数据包没有安全路径, 路径指针初始化为伪造的安全路径
  if ((sp = skb->sp) == NULL)
   sp = &dummy;
// 遍历策略数组, 包括主策略和子策略(内核支持子策略的话),一般情况下就一个策略
  for (pi = 0; pi < npols; pi++) {
// 如果有非允许通过的其他安全策略, 放弃
   if (pols[pi] != pol &&
       pols[pi]->action != XFRM_POLICY_ALLOW)
    goto reject;
// 如果策略层次太多, 放弃
   if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
    goto reject_error;
// 备份策略中的xfrm向量模板, ti是数量
   for (i = 0; i < pols[pi]->xfrm_nr; i++)
    tpp[ti++] = &pols[pi]->xfrm_vec[i];
  }
// 策略数量
  xfrm_nr = ti;
  if (npols > 1) {
// 如果超过一个策略,进行排序, 只是在内核支持子系统时才用, 否则只是返回错误
// 但该错误可以忽略
   xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
   tpp = stp;
  }
  /* For each tunnel xfrm, find the first matching tmpl.
   * For each tmpl before that, find corresponding xfrm.
   * Order is _important_. Later we will implement
   * some barriers, but at the moment barriers
   * are implied between each two transformations.
   */
// 遍历检查策略模板是否OK
  for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
// 注意k既是输入, 也是输出值, k初始化为0
// 返回值大于等于0表示策略合法可用
   k = xfrm_policy_ok(tpp[i], sp, k, family);
   if (k < 0) {
    if (k < -1)
     /* "-2 - errored_index" returned */
     xerr_idx = -(2+k);
    goto reject;
   }
  }
// 存在非传输模式的策略, 放弃
  if (secpath_has_nontransport(sp, k, &xerr_idx))
   goto reject;
  xfrm_pols_put(pols, npols);
  return 1;
 }
// 放弃, 返回0表示检查不通过
reject:
 xfrm_secpath_reject(xerr_idx, skb, &fl);
reject_error:
 xfrm_pols_put(pols, npols);
 return 0;
}
EXPORT_SYMBOL(__xfrm_policy_check);
 

/*
 * 0 or more than 0 is returned when validation is succeeded (either bypass
 * because of optional transport mode, or next index of the mathced secpath
 * state with the template.
 * -1 is returned when no matching template is found.
 * Otherwise "-2 - errored_index" is returned.
 */
static inline int
xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
        unsigned short family)
{
 int idx = start;
 if (tmpl->optional) {
// 如果是传输模式, 直接返回
  if (tmpl->mode == XFRM_MODE_TRANSPORT)
   return start;
 } else
  start = -1;
 for (; idx < sp->len; idx++) {
// sp->xvec是xfrm状态
// 如果安全路径和模板匹配,返回索引位置
  if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
   return ++idx;
// 如果安全路径中的SA不是传输模式,返回错误
  if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
   if (start == -1)
    start = -2-idx;
   break;
  }
 }
 return start;
}

5.6 安全策略路由查找
 
xfrm_lookup函数是个非常重要的函数, 用来根据安全策略构造数据包的路由项链表, 该路由项链表反映了对数据包进行IPSEC封装的多层次的处理, 每封装一次, 就增加一个路由项.
该函数被路由查找函数ip_route_output_flow()调用, 针对的是转发或发出的数据包.

/* Main function: finds/creates a bundle for given flow.
 *
 * At the moment we eat a raw IP route. Mostly to speed up lookups
 * on interfaces with disabled IPsec.
 */
// 返回0表示超过, 负数表示失败
int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
  struct sock *sk, int flags)
{
 struct xfrm_policy *policy;
 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 int npols;
 int pol_dead;
 int xfrm_nr;
 int pi;
 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
 struct dst_entry *dst, *dst_orig = *dst_p;
 int nx = 0;
 int err;
 u32 genid;
 u16 family;
 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
restart:
// 初始化清零操作
 genid = atomic_read(&flow_cache_genid);
 policy = NULL;
 for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
  pols[pi] = NULL;
 npols = 0;
 pol_dead = 0;
 xfrm_nr = 0;
 if (sk && sk->sk_policy[1]) {
// 如果在sock中定义了安全策略, 查找该sock相关的策略
// 一个socket的安全策略可通过setsockopt()设置, socket选项为
// IP_IPSEC_POLICY或IP_XFRM_POLICY(net/ipv4/ip_sockglue.c)
  policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
  if (IS_ERR(policy))
   return PTR_ERR(policy);
 }
 if (!policy) {
// 没找到sock自身定义的安全策略
  /* To accelerate a bit...  */
// 如果初始路由中设置了非IPSEC标志或没有发出方向的安全策略, 直接返回
  if ((dst_orig->flags & DST_NOXFRM) ||
      !xfrm_policy_count[XFRM_POLICY_OUT])
   return 0;
// 查找路由信息, 如果没有就创建路由, xfrm_policy_lookup()函数作为参数传递给
// flow_cache_lookup()函数, 查找和该路由对应的安全策略
  policy = flow_cache_lookup(fl, dst_orig->ops->family,
        dir, xfrm_policy_lookup);
  if (IS_ERR(policy))
   return PTR_ERR(policy);
 }
// 找不到策略的话返回, 就是普通包普通路由项
 if (!policy)
  return 0;
// 以下是存在安全策略的情况, 要对该包建立安全路由链表
// 初始路由的协议族
 family = dst_orig->ops->family;
// 安全策略最近使用时间
 policy->curlft.use_time = (unsigned long)xtime.tv_sec;
// 将找到的策略作为策略数组的第一项
 pols[0] = policy;
 npols ++;
 xfrm_nr += pols[0]->xfrm_nr;
// 根据策略操作结果进行相关处理, 只有两种情况: 阻塞或通过
 switch (policy->action) {
 case XFRM_POLICY_BLOCK:
// 阻塞该数据包, 返回错误
  /* Prohibit the flow */
  err = -EPERM;
  goto error;
 case XFRM_POLICY_ALLOW:
// 允许该包通过, 这样就要对该包进行IPSEC处理
#ifndef CONFIG_XFRM_SUB_POLICY
// 对子策略操作忽略
  if (policy->xfrm_nr == 0) {
   /* Flow passes not transformed. */
   xfrm_pol_put(policy);
   return 0;
  }
#endif
  /* Try to find matching bundle.
   *
   * LATER: help from flow cache. It is optional, this
   * is required only for output policy.
   */
// 查找是否已经存在安全路由, bundle可以理解为描述安全处理的安全路由, 数据包走该路由
// 就是进行某种安全封装, 和普通路由项一样, 用过的安全路由也被缓存起来
  dst = xfrm_find_bundle(fl, policy, family);
  if (IS_ERR(dst)) {
   err = PTR_ERR(dst);
   goto error;
  }
// 如果找到安全路由, 退出switch
  if (dst)
   break;
#ifdef CONFIG_XFRM_SUB_POLICY
// 对子策略操作, 由于是非标准IPSEC,忽略
  if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
   pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
           fl, family,
           XFRM_POLICY_OUT);
   if (pols[1]) {
    if (IS_ERR(pols[1])) {
     err = PTR_ERR(pols[1]);
     goto error;
    }
    if (pols[1]->action == XFRM_POLICY_BLOCK) {
     err = -EPERM;
     goto error;
    }
    npols ++;
    xfrm_nr += pols[1]->xfrm_nr;
   }
  }
  /*
   * Because neither flowi nor bundle information knows about
   * transformation template size. On more than one policy usage
   * we can realize whether all of them is bypass or not after
   * they are searched. See above not-transformed bypass
   * is surrounded by non-sub policy configuration, too.
   */
  if (xfrm_nr == 0) {
   /* Flow passes not transformed. */
   xfrm_pols_put(pols, npols);
   return 0;
  }
#endif
// 没找到安全路由, 准备构造新的路由项
// 利用策略, 流等参数构造相关SA(xfrm_state)保存在xfrm中, nx为SA数量
  nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
  if (unlikely(nx<0)) {
// nx<0表示失败, 没找到SA
// 但如果是-EAGAIN表示已经通知用户空间的IKE进行协商新的SA了,
// 目前只生成了ACQUIRE类型的xfrm_state
   err = nx;
   if (err == -EAGAIN && flags) {
// 进程进入阻塞状态
    DECLARE_WAITQUEUE(wait, current);
    add_wait_queue(&km_waitq, &wait);
    set_current_state(TASK_INTERRUPTIBLE);
    schedule();
    set_current_state(TASK_RUNNING);
    remove_wait_queue(&km_waitq, &wait);
// 阻塞解除, 重新解析SA
    nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
    if (nx == -EAGAIN && signal_pending(current)) {
     err = -ERESTART;
     goto error;
    }
    if (nx == -EAGAIN ||
        genid != atomic_read(&flow_cache_genid)) {
     xfrm_pols_put(pols, npols);
     goto restart;
    }
    err = nx;
   }
   if (err < 0)
    goto error;
  }
  if (nx == 0) {
// nx==0表示数据是不需要进行IPSEC处理的, 返回
   /* Flow passes not transformed. */
   xfrm_pols_put(pols, npols);
   return 0;
  }
// 保存初始路由
  dst = dst_orig;
// 创建新的安全路由, 返回0 表示成功, 失败返回负数
// dst在成功返回时保存安全路由项, 每个SA处理对应一个安全路由, 这些安全路由通过
// 路由项中的child链接为一个链表, 这样就可以对数据包进行连续变换, 如先压缩,
// 再ESP封装, 再AH封装等.
// 路由项链表的构造和协议族相关, 后续文章中介绍具体协议族中的实现时再详细描述
// 所构造出的路由项的具体结构情况
  err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
  if (unlikely(err)) {
// 失败的话释放刚获取的SA
   int i;
   for (i=0; i<nx; i++)
    xfrm_state_put(xfrm[i]);
   goto error;
  }
// 检查所有策略的dead状态
  for (pi = 0; pi < npols; pi++) {
   read_lock_bh(&pols[pi]->lock);
   pol_dead |= pols[pi]->dead;
   read_unlock_bh(&pols[pi]->lock);
  }
  write_lock_bh(&policy->lock);
// 如果有策略是dead或获取的安全路由项有问题, 释放安全路由
  if (unlikely(pol_dead || stale_bundle(dst))) {
   /* Wow! While we worked on resolving, this
    * policy has gone. Retry. It is not paranoia,
    * we just cannot enlist new bundle to dead object.
    * We can't enlist stable bundles either.
    */
   write_unlock_bh(&policy->lock);
   if (dst)
    dst_free(dst);
   err = -EHOSTUNREACH;
   goto error;
  }
// 将安全路由加入到策略的路由项链表头, 该链表是以NULL结尾的单向链表
// 不过一般情况下应该只有一个元素
  dst->next = policy->bundles;
  policy->bundles = dst;
  dst_hold(dst);
  write_unlock_bh(&policy->lock);
 }
// 将安全链表作为
 *dst_p = dst;
 dst_release(dst_orig);
  xfrm_pols_put(pols, npols);
 return 0;
error:
 dst_release(dst_orig);
 xfrm_pols_put(pols, npols);
 *dst_p = NULL;
 return err;
}
EXPORT_SYMBOL(xfrm_lookup);
以下是在xfrm_lookup中用到的两个bundle的操作函数: 查找和创建, 由于使用了地址参数, 是和协议族相关的, 因此具体实现是在各协议族中实现的, 在后续文章中介绍协议族中的xfrm实现时再详细介绍.
static struct dst_entry *
xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
{
 struct dst_entry *x;
 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 if (unlikely(afinfo == NULL))
  return ERR_PTR(-EINVAL);
 x = afinfo->find_bundle(fl, policy);
 xfrm_policy_put_afinfo(afinfo);
 return x;
}
/* Allocate chain of dst_entry's, attach known xfrm's, calculate
 * all the metrics... Shortly, bundle a bundle.
 */
static int
xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
     struct flowi *fl, struct dst_entry **dst_p,
     unsigned short family)
{
 int err;
 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 if (unlikely(afinfo == NULL))
  return -EINVAL;
 err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
 xfrm_policy_put_afinfo(afinfo);
 return err;
}

// 策略解析, 生成SA
static int
xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
    struct xfrm_state **xfrm,
    unsigned short family)
{
 struct xfrm_state *tp[XFRM_MAX_DEPTH];
// npols > 1是定义了子策略的情况, 这时用tp数组保存找到的SA, 但没法返回原函数中了
// 不明白为什么这么作
 struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
 int cnx = 0;
 int error;
 int ret;
 int i;
// 遍历策略, 一般情况下npols其实只是1
 for (i = 0; i < npols; i++) {
// 检查保存SA的缓冲区是否还够大
  if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
   error = -ENOBUFS;
   goto fail;
  }
// 协议一个策略模板
  ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
  if (ret < 0) {
   error = ret;
   goto fail;
  } else
   cnx += ret;
 }
 /* found states are sorted for outbound processing */
// 多个策略的话对找到的SA排序, 在没定义子策略的情况下是个空函数
 if (npols > 1)
  xfrm_state_sort(xfrm, tpp, cnx, family);
 return cnx;
 fail:
 for (cnx--; cnx>=0; cnx--)
  xfrm_state_put(tpp[cnx]);
 return error;
}
/* Resolve list of templates for the flow, given policy. */
static int
xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
        struct xfrm_state **xfrm,
        unsigned short family)
{
 int nx;
 int i, error;
// 从流结构中获取地址信息
 xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
 xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
 xfrm_address_t tmp;
// 遍历策略中的所有SA
 for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
  struct xfrm_state *x;
  xfrm_address_t *remote = daddr;
  xfrm_address_t *local  = saddr;
  struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
  if (tmpl->mode == XFRM_MODE_TUNNEL) {
// 如果是通道模式, 会添加外部IP头, 内部IP头都封装在内部, 因此地址信息使用外部地址
// 即策略的SA模板中的地址信息
   remote = &tmpl->id.daddr;
   local = &tmpl->saddr;
// 如果local地址没定义, 选取个源地址作为本地地址, 选取过程是协议族相关的
   if (xfrm_addr_any(local, family)) {
    error = xfrm_get_saddr(&tmp, remote, family);
    if (error)
     goto fail;
    local = &tmp;
   }
  }
// 根据地址,流,策略等新查找SA(xfrm_state),如果找不到现成的会通知IKE程序进行协商
// 生成新的SA, 但生成可用SA前先返回ACQUIRE类型的SA, 见前一篇文章
  x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
  if (x && x->km.state == XFRM_STATE_VALID) {
// 如果SA是合法, 保存
   xfrm[nx++] = x;
   daddr = remote;
   saddr = local;
   continue;
  }
  if (x) {
// x存在但不是VALID的, 只要不出错, 应该是ACQUIRE类型的, 等IKE进程协商结果, 返回-EAGAIN
   error = (x->km.state == XFRM_STATE_ERROR ?
     -EINVAL : -EAGAIN);
   xfrm_state_put(x);
  }
  if (!tmpl->optional)
   goto fail;
 }
 return nx;
fail:
 for (nx--; nx>=0; nx--)
  xfrm_state_put(xfrm[nx]);
 return error;
}

关于路由处理过程在后面介绍IPSEC包的发出过程时会介绍路由处理过程, 从而了解安全路由的作用.
 
5.6 变更HASH表大小

改变策略状态表的是通过工作队列来实现的, 和xfrm_state类似

工作定义:
static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);

// 更改HASH表大小
static void xfrm_hash_resize(void *__unused)
{
 int dir, total;
 mutex_lock(&hash_resize_mutex);
 total = 0;
// 注意策略都是双向的
 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
// 按目的地址进行HASH的链表: 如果需要更改HASH表大小, 修改之
  if (xfrm_bydst_should_resize(dir, &total))
   xfrm_bydst_resize(dir);
 }
// 按索引号进行HASH的链表更新
 if (xfrm_byidx_should_resize(total))
  xfrm_byidx_resize(total);
 mutex_unlock(&hash_resize_mutex);
}
// 检查按目的地址HASH的HASH链表
static inline int xfrm_bydst_should_resize(int dir, int *total)
{
// 该方向是策略的数量
 unsigned int cnt = xfrm_policy_count[dir];
// 该方向是策略的掩码
 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
// 累加策略数量
 if (total)
  *total += cnt;
// 如果策略数量大于策略掩码量, 该增加了
 if ((hmask + 1) < xfrm_policy_hashmax &&
     cnt > hmask)
  return 1;
// 否则不用
 return 0;
}
// 检查按索引号HASH的HASH链表
static inline int xfrm_byidx_should_resize(int total)
{
 unsigned int hmask = xfrm_idx_hmask;
// 策略总量超过当前的索引号掩码, 该扩大了
 if ((hmask + 1) < xfrm_policy_hashmax &&
     total > hmask)
  return 1;
 return 0;
}

// 更改按目的地址HASH的HASH链表大小
static void xfrm_bydst_resize(int dir)
{
// 该方向的HASH表掩码(最大值, 一般是2^N-1)
 unsigned int hmask = xfrm_policy_bydst[dir].hmask;
// 新HASH表掩码(2^(N+1)-1)
 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
// 新HASH表大小
 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
// 老HAHS表
 struct hlist_head *odst = xfrm_policy_bydst[dir].table;
// 新HASH表
 struct hlist_head *ndst = xfrm_hash_alloc(nsize);
 int i;
// 新HASH表空间分配不出来, 返回
 if (!ndst)
  return;
 write_lock_bh(&xfrm_policy_lock);
// 将所有策略节点转到新HASH表
 for (i = hmask; i >= 0; i--)
  xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
// 将全局变量值更新为新HASH表参数
 xfrm_policy_bydst[dir].table = ndst;
 xfrm_policy_bydst[dir].hmask = nhashmask;
 write_unlock_bh(&xfrm_policy_lock);
// 释放老HASH表参数
 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
}

// 更改按索引号HASH的HASH链表大小, 操作和上面类似
static void xfrm_byidx_resize(int total)
{
 unsigned int hmask = xfrm_idx_hmask;
 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 struct hlist_head *oidx = xfrm_policy_byidx;
 struct hlist_head *nidx = xfrm_hash_alloc(nsize);
 int i;
 if (!nidx)
  return;
 write_lock_bh(&xfrm_policy_lock);
 for (i = hmask; i >= 0; i--)
  xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
 xfrm_policy_byidx = nidx;
 xfrm_idx_hmask = nhashmask;
 write_unlock_bh(&xfrm_policy_lock);
 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
}
 
5.7 垃圾搜集
 
垃圾搜集的是不用的安全路由项, 是和协议族相关的
afinfo->garbage_collect = __xfrm_garbage_collect;

// 就是xfrm_prune_bundles()函数的包装函数,条件是unused_bundle()函数定义
static void __xfrm_garbage_collect(void)
{
 xfrm_prune_bundles(unused_bundle);
}

// 删减安全路由
static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
{
// 垃圾链表
 struct dst_entry *gc_list = NULL;
 int dir;
 read_lock_bh(&xfrm_policy_lock);
// 循环所有方向
 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
  struct xfrm_policy *pol;
  struct hlist_node *entry;
  struct hlist_head *table;
  int i;
// 遍历inexact链表
  hlist_for_each_entry(pol, entry,
         &xfrm_policy_inexact[dir], bydst)
// 如果节点满足条件就删除挂接到垃圾链表
   prune_one_bundle(pol, func, &gc_list);

// 遍历目的地址HASH的链表
  table = xfrm_policy_bydst[dir].table;
  for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
// 如果节点满足条件就删除挂接到垃圾链表
   hlist_for_each_entry(pol, entry, table + i, bydst)
    prune_one_bundle(pol, func, &gc_list);
  }
 }
 read_unlock_bh(&xfrm_policy_lock);
// 如果搜集到的垃圾, 释放安全路由
 while (gc_list) {
  struct dst_entry *dst = gc_list;
  gc_list = dst->next;
  dst_free(dst);
 }
}
// 没用的路由, 使用数为0
static int unused_bundle(struct dst_entry *dst)
{
 return !atomic_read(&dst->__refcnt);
}
// 删除单个路由
static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
{
 struct dst_entry *dst, **dstp;
// 策略写锁
 write_lock(&pol->lock);
// 策略的路由项链表起点
 dstp = &pol->bundles;
// 遍历链表
 while ((dst=*dstp) != NULL) {
  if (func(dst)) {
// 如果满足条件, 将节点从链表中删除, 添加到垃圾链表
   *dstp = dst->next;
   dst->next = *gc_list_p;
   *gc_list_p = dst;
  } else {
   dstp = &dst->next;
  }
 }
 write_unlock(&pol->lock);
}

5.8 杂项

这些杂项并不是策略的直接处理函数, 而是xfrm的一些相关处理, 只是也放在xfrm_policy.c中了.

5.8.1 协议处理类型处理
xfrm_type用来定义各种协议处理类型, 如AH,ESP, IPCOMP, IPIP等
// 登记协议处理类型, 返回0成功, 非0失败
int xfrm_register_type(struct xfrm_type *type, unsigned short family)
{
// 找到协议族相关的策略信息结构
 struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
 struct xfrm_type **typemap;
 int err = 0;
 if (unlikely(afinfo == NULL))
  return -EAFNOSUPPORT;
// 策略信息结构中的类型数组
 typemap = afinfo->type_map;
// 如果数组中相应协议对应元素非空, 则赋值, 否则发生错误
 if (likely(typemap[type->proto] == NULL))
  typemap[type->proto] = type;
 else
  err = -EEXIST;
 xfrm_policy_unlock_afinfo(afinfo);
 return err;
}
EXPORT_SYMBOL(xfrm_register_type);

// 拆除协议处理类型, 返回0成功, 非0失败
int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
{
// 找到协议族相关的策略信息结构
 struct xfrm_policy_afinfo *afinfo = xfrm_policy_lock_afinfo(family);
 struct xfrm_type **typemap;
 int err = 0;
 if (unlikely(afinfo == NULL))
  return -EAFNOSUPPORT;
// 策略信息结构中的类型数组
 typemap = afinfo->type_map;
// 如果数组中相应协议对应元素等于要删除的结构, 元素清空, 否则发生错误
 if (unlikely(typemap[type->proto] != type))
  err = -ENOENT;
 else
  typemap[type->proto] = NULL;
 xfrm_policy_unlock_afinfo(afinfo);
 return err;
}
EXPORT_SYMBOL(xfrm_unregister_type);
// 根据协议号和协议族查找类型
struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
{
 struct xfrm_policy_afinfo *afinfo;
 struct xfrm_type **typemap;
 struct xfrm_type *type;
 int modload_attempted = 0;
retry:
// 找到协议族相关的策略信息结构
 afinfo = xfrm_policy_get_afinfo(family);
 if (unlikely(afinfo == NULL))
  return NULL;
// 策略信息结构中的类型数组
 typemap = afinfo->type_map;
// 数组中对应指定协议的元素
 type = typemap[proto];
// 增加type模块的使用计数
 if (unlikely(type && !try_module_get(type->owner)))
  type = NULL;
// 如果当前type为空, 则加载type的内核模块, 重新查找
 if (!type && !modload_attempted) {
  xfrm_policy_put_afinfo(afinfo);
  request_module("xfrm-type-%d-%d",
          (int) family, (int) proto);
  modload_attempted = 1;
  goto retry;
 }
 xfrm_policy_put_afinfo(afinfo);
 return type;
}
// 释放类型模块使用计数
void xfrm_put_type(struct xfrm_type *type)
{
 module_put(type->owner);
}
 
5.8.2 协议模式处理

模式目前包括通道和传输两种.

// 登记模式, 返回0成功, 非0失败
int xfrm_register_mode(struct xfrm_mode *mode, int family)
{
 struct xfrm_policy_afinfo *afinfo;
 struct xfrm_mode **modemap;
 int err;
 if (unlikely(mode->encap >= XFRM_MODE_MAX))
  return -EINVAL;
// 找到协议族相关的策略信息结构
 afinfo = xfrm_policy_lock_afinfo(family);
 if (unlikely(afinfo == NULL))
  return -EAFNOSUPPORT;
 err = -EEXIST;
// 策略信息结构中的模式数组
 modemap = afinfo->mode_map;
// 数组元素非空的话赋值, 返回成功
 if (likely(modemap[mode->encap] == NULL)) {
  modemap[mode->encap] = mode;
  err = 0;
 }
 xfrm_policy_unlock_afinfo(afinfo);
 return err;
}
EXPORT_SYMBOL(xfrm_register_mode);
// 拆除模式, 返回0成功, 非0失败
int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
{
 struct xfrm_policy_afinfo *afinfo;
 struct xfrm_mode **modemap;
 int err;
 if (unlikely(mode->encap >= XFRM_MODE_MAX))
  return -EINVAL;
// 找到协议族相关的策略信息结构
 afinfo = xfrm_policy_lock_afinfo(family);
 if (unlikely(afinfo == NULL))
  return -EAFNOSUPPORT;
 err = -ENOENT;
// 策略信息结构中的模式数组
 modemap = afinfo->mode_map;
// 数组元素等于要拆除的模式, 清空, 返回成功
 if (likely(modemap[mode->encap] == mode)) {
  modemap[mode->encap] = NULL;
  err = 0;
 }
 xfrm_policy_unlock_afinfo(afinfo);
 return err;
}
EXPORT_SYMBOL(xfrm_unregister_mode);

// 查找模式
struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
{
 struct xfrm_policy_afinfo *afinfo;
 struct xfrm_mode *mode;
 int modload_attempted = 0;
 if (unlikely(encap >= XFRM_MODE_MAX))
  return NULL;
retry:
// 找到协议族相关的策略信息结构
 afinfo = xfrm_policy_get_afinfo(family);
 if (unlikely(afinfo == NULL))
  return NULL;
// 策略信息结构中的模式数组
 mode = afinfo->mode_map[encap];
// 增加模式模块的使用计数
 if (unlikely(mode && !try_module_get(mode->owner)))
  mode = NULL;
// 如果当前模式为空, 则加载模式对应的内核模块, 重新查找
 if (!mode && !modload_attempted) {
  xfrm_policy_put_afinfo(afinfo);
  request_module("xfrm-mode-%d-%d", family, encap);
  modload_attempted = 1;
  goto retry;
 }
 xfrm_policy_put_afinfo(afinfo);
 return mode;
}

// 释放模式模块使用计数
void xfrm_put_mode(struct xfrm_mode *mode)
{
 module_put(mode->owner);
}

5.8.3 协议信息处理
// 登记协议信息结构
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
 int err = 0;
 if (unlikely(afinfo == NULL))
  return -EINVAL;
 if (unlikely(afinfo->family >= NPROTO))
  return -EAFNOSUPPORT;
 write_lock_bh(&xfrm_policy_afinfo_lock);
// 数组中的对应协议的协议信息结构元素应该为空
 if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
  err = -ENOBUFS;
 else {
// 安全路由操作结构
  struct dst_ops *dst_ops = afinfo->dst_ops;
// 安全路由操作结构的参数和操作函数赋值
  if (likely(dst_ops->kmem_cachep == NULL))
   dst_ops->kmem_cachep = xfrm_dst_cache;
  if (likely(dst_ops->check == NULL))
   dst_ops->check = xfrm_dst_check;
  if (likely(dst_ops->negative_advice == NULL))
   dst_ops->negative_advice = xfrm_negative_advice;
  if (likely(dst_ops->link_failure == NULL))
   dst_ops->link_failure = xfrm_link_failure;
  if (likely(afinfo->garbage_collect == NULL))
   afinfo->garbage_collect = __xfrm_garbage_collect;
// 数组中的对应协议的协议信息结构元素填为协议信息结构
  xfrm_policy_afinfo[afinfo->family] = afinfo;
 }
 write_unlock_bh(&xfrm_policy_afinfo_lock);
 return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);

// 拆除协议信息结构
int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
{
 int err = 0;
 if (unlikely(afinfo == NULL))
  return -EINVAL;
 if (unlikely(afinfo->family >= NPROTO))
  return -EAFNOSUPPORT;
 write_lock_bh(&xfrm_policy_afinfo_lock);
 if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
// 数组中的协议信息结构等于指定的信息结构
  if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
   err = -EINVAL;
  else {
// 清空协议信息数组元素和路由操作结构参数
   struct dst_ops *dst_ops = afinfo->dst_ops;
   xfrm_policy_afinfo[afinfo->family] = NULL;
   dst_ops->kmem_cachep = NULL;
   dst_ops->check = NULL;
   dst_ops->negative_advice = NULL;
   dst_ops->link_failure = NULL;
   afinfo->garbage_collect = NULL;
  }
 }
 write_unlock_bh(&xfrm_policy_afinfo_lock);
 return err;
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);

// 查找协议信息结构, 加读锁
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
{
 struct xfrm_policy_afinfo *afinfo;
 if (unlikely(family >= NPROTO))
  return NULL;
 read_lock(&xfrm_policy_afinfo_lock);
// 获取指定协议位置处的协议信息结构
 afinfo = xfrm_policy_afinfo[family];
// 如果该协议信息结构不存在, 解锁
 if (unlikely(!afinfo))
  read_unlock(&xfrm_policy_afinfo_lock);
 return afinfo;
}

// 释放协议信息结构, 解读锁
static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
{
 read_unlock(&xfrm_policy_afinfo_lock);
}

// 协议信息结构加写锁, 返回指定的协议信息结构, 错误时返回NULL
static struct xfrm_policy_afinfo *xfrm_policy_lock_afinfo(unsigned int family)
{
 struct xfrm_policy_afinfo *afinfo;
 if (unlikely(family >= NPROTO))
  return NULL;
 write_lock_bh(&xfrm_policy_afinfo_lock);
// 获取指定协议位置处的协议信息结构
 afinfo = xfrm_policy_afinfo[family];
// 如果该协议信息结构不存在, 解锁
 if (unlikely(!afinfo))
  write_unlock_bh(&xfrm_policy_afinfo_lock);
 return afinfo;
}
// 协议信息结构解写锁
static void xfrm_policy_unlock_afinfo(struct xfrm_policy_afinfo *afinfo)
{
 write_unlock_bh(&xfrm_policy_afinfo_lock);
}
 
5.8.4 网卡回调

// 网卡通知结构
static struct notifier_block xfrm_dev_notifier = {
 xfrm_dev_event,
 NULL,
 0
};
// 回调函数
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
 switch (event) {
// 就只响应网卡停事件, 删除和网卡相关的所有安全路由项
 case NETDEV_DOWN:
  xfrm_flush_bundles();
 }
 return NOTIFY_DONE;
}
static int xfrm_flush_bundles(void)
{
// 也是使用xfrm_prune_bundles()函数进行删除操作
// 条件函数是stale_bundle
 xfrm_prune_bundles(stale_bundle);
 return 0;
}
// 判断安全路由项是否可用
// 返回1表示不可用, 0表示可用
static int stale_bundle(struct dst_entry *dst)
{
 return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
}
// 返回0表示不可用, 1表示可用
int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
  struct flowi *fl, int family, int strict)
{
 struct dst_entry *dst = &first->u.dst;
 struct xfrm_dst *last;
 u32 mtu;
// 检查路由项
 if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
// 检查网卡是否在运行
     (dst->dev && !netif_running(dst->dev)))
  return 0;
 last = NULL;
 do {
// 安全路由
  struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
// 检查SA选择子是否匹配流结构
  if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
   return 0;
  if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm, pol))
   return 0;
// 检查SA状态是否合法
  if (dst->xfrm->km.state != XFRM_STATE_VALID)
   return 0;
  if (xdst->genid != dst->xfrm->genid)
   return 0;
// 严格检查时, 检查非通道模式下的SA地址和流结构参数是否匹配
  if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL &&
      !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
   return 0;
// 子路由项的MTU
  mtu = dst_mtu(dst->child);
  if (xdst->child_mtu_cached != mtu) {
   last = xdst;
   xdst->child_mtu_cached = mtu;
  }
// 通用路由检查
  if (!dst_check(xdst->route, xdst->route_cookie))
   return 0;
// 安全路由相关的普通路由的MTU
  mtu = dst_mtu(xdst->route);
  if (xdst->route_mtu_cached != mtu) {
   last = xdst;
   xdst->route_mtu_cached = mtu;
  }
// 遍历安全路由链表
  dst = dst->child;
 } while (dst->xfrm);
// last是最后一个和子路由和普通路由的MTU不同的安全路由, 一般都是相同的
 if (likely(!last))
  return 1;
// 调整各路由项中的MTU
 mtu = last->child_mtu_cached;
 for (;;) {
  dst = &last->u.dst;
  mtu = xfrm_state_mtu(dst->xfrm, mtu);
  if (mtu > last->route_mtu_cached)
   mtu = last->route_mtu_cached;
  dst->metrics[RTAX_MTU-1] = mtu;
  if (last == first)
   break;
  last = last->u.next;
  last->child_mtu_cached = mtu;
 }
 return 1;
}

5.9 小结
xfrm_policy相关函数的调用被调用关系可如下简单表示:
ip_route_output_flow
  ->xfrm_lookup: find xfrm_dst for the skb, create dst_list
    -> xfrm_sk_policy_lookup
    -> flow_cache_lookup
    -> xfrm_find_bundle
    -> xfrm_policy_lookup_bytype
    -> xfrm_tmpl_resolve
      -> xfrm_tmpl_resolve_one
        -> xfrm_get_saddr
          -> afinfo->get_saddr == xfrm4_get_saddr
            -> xfrm4_dst_lookup
        -> xfrm_state_find
          -> __xfrm_state_lookup
          -> xfrm_state_alloc
          -> km_query
            -> km->acquire (pfkey_acquire, xfrm_send_acquire)
      -> xfrm_state_sort
        -> afinfo->state_sort == NULL
    -> km_wait_queue
    -> xfrm_bundle_create

do_ip_setsockopt
  -> xfrm_user_policy
    -> km->compile_policy
 -> xfrm_sk_policy_insert
 
pfkey_compile_policy
  -> xfrm_policy_alloc
    timer.func=xfrm_policy_timer

pfkey_spdadd
  -> xfrm_policy_alloc
  -> xfrm_policy_insert
    -> policy_hash_bysel
    -> selector_cmp
    -> xfrm_sel_ctx_match
   
pfkey_spddelete
  -> xfrm_policy_bysel_ctx
    -> policy_hash_bysel
    -> xfrm_sel_ctx_match
   
pfkey_spdget
  -> xfrm_policy_byid

xfrm_flush_policy
pfkey_policy_flush
  -> xfrm_policy_flush
    -> xfrm_policy_kill

xfrm_dump_policy
  -> xfrm_policy_walk
    -> dump_one_policy
pfkey_spddump
  -> xfrm_policy_walk
    -> dump_sp
gen_reqid
  -> xfrm_policy_walk
    -> check_reqid

xfrm_add_pol_expire
xfrm_policy_timer
  -> xfrm_policy_delete
    -> __xfrm_policy_unlink
    -> xfrm_policy_kill
 
xfrm_sk_policy_insert
  -> xfrm_get_index
  -> __xfrm_policy_link
  -> __xfrm_policy_unlink
  -> xfrm_policy_kill
 
 
xfrm_sk_clone_policy
  -> __xfrm_sk_clone_policy
    -> clone_policy
      -> xfrm_policy_alloc
      -> __xfrm_policy_link

xfrm_decode_session
  -> xfrm4_decode_session

xfrm4_route_forward
  -> xfrm_route_forward
    -> __xfrm_route_forward
      -> xfrm4_decode_session
      -> xfrm_lookup

xfrm4_policy_check
  -> xfrm_policy_check
    -> __xfrm_policy_check
      -> xfrm4_decode_session
      -> __xfrm_sk_policy_lookup
        -> xfrm_selector_match
      -> __flow_cache_lookup
        -> xfrm_policy_lookup
        -> xfrm_policy_lookup_bytype
          -> policy_hash_direct
          -> xfrm_policy_match
            -> xfrm_selector_match
      -> xfrm_policy_lookup_bytype
      -> xfrm_tmpl_sort
      -> xfrm_policy_ok
        -> xfrm_state_ok
       
xfrm_flush_bundles
  -> xfrm_prune_bundles
    -> prune_one_bundles
      -> stale_bundle 
   
 

发表于： 2007-06-09，修改于： 2007-06-09 08:37，已浏览3527次，有评论4条 推荐 投诉
	网友： Zetalog 	时间：2007-11-20 17:54:12 IP地址：218.81.225.★
	

关于xfrm_policy_lookup_bytype我来注释两句。

可以说xfrm_policy_bydst中存放的是目标地址和源地址是单一地址的策略（换言之，就是掩码prefixlen_x都是32），而xfrm_policy_inexact里面存放的是掩码非32的策略。

看函数xfrm_policy_lookup_bytype实现能发现一个问题，就是单一地址策略即使优先级低于掩码型策略，单一地址策略表也要被先遍历一遍。

不知道为什么要将bydst分成两个哈希表。暂时还没看到。


	网友： Zetalog 	时间：2007-11-20 17:54:12 IP地址：218.81.225.★
	

关于xfrm_policy_lookup_bytype我来注释两句。

可以说xfrm_policy_bydst中存放的是目标地址和源地址是单一地址的策略（换言之，就是掩码prefixlen_x都是32），而xfrm_policy_inexact里面存放的是掩码非32的策略。

看函数xfrm_policy_lookup_bytype实现能发现一个问题，就是单一地址策略即使优先级低于掩码型策略，单一地址策略表也要被先遍历一遍。

不知道为什么要将bydst分成两个哈希表。暂时还没看到。


	网友： Evan 	时间：2008-03-26 15:03:24 IP地址：58.34.236.★
	

请问： xfrm_policy_insert的最后要清理新插入的policy后面的dst_entry， 这样做的目的是什么呢， 如果不清理有什么影响呢？谢谢。


	网友： yfydz 	时间：2008-04-01 09:07:00 IP地址：218.247.216.★
	

没啥关系,gc工作队列应该也会处理,只是这个是新的策略,顺手把老的删除了也没啥
相关推荐