From e0d1caa7b0d5f02e4f34aa09c695d04251310c6c Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:29:07 -0700 Subject: [MLSXFRM]: Flow based matching of xfrm policy and state This implements a seemless mechanism for xfrm policy selection and state matching based on the flow sid. This also includes the necessary SELinux enforcement pieces. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0021aad..be02bd9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -367,7 +367,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, */ if (x->km.state == XFRM_STATE_VALID) { if (!xfrm_selector_match(&x->sel, fl, family) || - !xfrm_sec_ctx_match(pol->security, x->security)) + !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || best->km.dying > x->km.dying || @@ -379,7 +379,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if (xfrm_selector_match(&x->sel, fl, family) && - xfrm_sec_ctx_match(pol->security, x->security)) + security_xfrm_state_pol_flow_match(x, pol, fl)) error = -ESRCH; } } @@ -403,6 +403,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + if (error) { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + x = NULL; + goto out; + } + if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); -- cgit v1.1 From cb969f072b6d67770b559617f14e767f47e77ece Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:20 -0700 Subject: [MLSXFRM]: Default labeling of socket specific IPSec policies This defaults the label of socket-specific IPSec policies to be the same as the socket they are set on. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index be02bd9..1c79608 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1026,7 +1026,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen err = -EINVAL; read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk->sk_family, optname, data, + pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; -- cgit v1.1 From 5794708f11551b6d19b10673abf4b0202f66b44d Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Fri, 22 Sep 2006 15:06:24 -0700 Subject: [XFRM]: Introduce a helper to compare id protocol. Put the helper to header for future use. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1c79608..34c038c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -294,7 +294,7 @@ void xfrm_state_flush(u8 proto) restart: list_for_each_entry(x, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && - (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { + xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); @@ -772,7 +772,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) + if (xfrm_id_proto_match(x->id.proto, proto)) count++; } } @@ -783,7 +783,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), for (i = 0; i < XFRM_DST_HSIZE; i++) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) + if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); if (err) -- cgit v1.1 From 6c44e6b7ab500d7e3e3f406c83325671be51a752 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:53:57 -0700 Subject: [XFRM] STATE: Add source address list. Support source address based searching. Mobile IPv6 will use it. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 34c038c..2a99928 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -45,6 +45,7 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Also, it can be used by ah/esp icmp error handler to find offending SA. */ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; DECLARE_WAIT_QUEUE_HEAD(km_waitq); @@ -200,6 +201,7 @@ struct xfrm_state *xfrm_state_alloc(void) atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->bydst); + INIT_LIST_HEAD(&x->bysrc); INIT_LIST_HEAD(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; @@ -240,6 +242,8 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&xfrm_state_lock); list_del(&x->bydst); __xfrm_state_put(x); + list_del(&x->bysrc); + __xfrm_state_put(x); if (x->id.spi) { list_del(&x->byspi); __xfrm_state_put(x); @@ -415,6 +419,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); + list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); list_add(&x->byspi, xfrm_state_byspi+h); @@ -448,11 +454,19 @@ static void __xfrm_state_insert(struct xfrm_state *x) list_add(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + h = xfrm_src_hash(&x->props.saddr, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + list_add(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); + if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family); + + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); + } + if (!mod_timer(&x->timer, jiffies + HZ)) xfrm_state_hold(x); @@ -1075,6 +1089,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) err = -ENOBUFS; else { afinfo->state_bydst = xfrm_state_bydst; + afinfo->state_bysrc = xfrm_state_bysrc; afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; } @@ -1097,6 +1112,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) else { xfrm_state_afinfo[afinfo->family] = NULL; afinfo->state_byspi = NULL; + afinfo->state_bysrc = NULL; afinfo->state_bydst = NULL; } } @@ -1218,6 +1234,7 @@ void __init xfrm_state_init(void) for (i=0; i Date: Wed, 23 Aug 2006 17:56:04 -0700 Subject: [XFRM] STATE: Search by address using source address list. This is a support to search transformation states by its addresses by using source address list for Mobile IPv6 usage. To use it from user-space, it is also added a message type for source address as a xfrm state option. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2a99928..11f480b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -487,6 +487,16 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, + int use_spi) +{ + if (use_spi) + return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + else + return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); +} + static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) @@ -495,6 +505,7 @@ int xfrm_state_add(struct xfrm_state *x) struct xfrm_state *x1; int family; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; afinfo = xfrm_state_get_afinfo(family); @@ -503,7 +514,7 @@ int xfrm_state_add(struct xfrm_state *x) spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -511,7 +522,7 @@ int xfrm_state_add(struct xfrm_state *x) goto out; } - if (x->km.seq) { + if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(x->km.seq); if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { xfrm_state_put(x1); @@ -519,7 +530,7 @@ int xfrm_state_add(struct xfrm_state *x) } } - if (!x1) + if (use_spi && !x1) x1 = afinfo->find_acq( x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -548,13 +559,14 @@ int xfrm_state_update(struct xfrm_state *x) struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); afinfo = xfrm_state_get_afinfo(x->props.family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); err = -ESRCH; if (!x1) @@ -675,6 +687,23 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * +xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto, unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} +EXPORT_SYMBOL(xfrm_state_lookup_byaddr); + +struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) -- cgit v1.1 From fbd9a5b47ee9c319ff0cae584391241ce78ffd6b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:08:21 -0700 Subject: [XFRM] STATE: Common receive function for route optimization extension headers. XFRM_STATE_WILDRECV flag is introduced; the last resort state is set it and receives packet which is not route optimized but uses such extension headers i.e. Mobile IPv6 signaling (binding update and acknowledgement). A node enabled Mobile IPv6 adds the state. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 11f480b..f053715 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -352,6 +352,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && -- cgit v1.1 From 060f02a3bdd4d9ba8aa3c48e9b470672b1f3a585 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 18:18:55 -0700 Subject: [XFRM] STATE: Introduce care-of address. Care-of address is carried by state as a transformation option like IPsec encryption/authentication algorithm. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- net/xfrm/xfrm_state.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f053715..3da89c0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -78,6 +78,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->ealg); kfree(x->calg); kfree(x->encap); + kfree(x->coaddr); if (x->mode) xfrm_put_mode(x->mode); if (x->type) { @@ -603,6 +604,11 @@ out: if (likely(x1->km.state == XFRM_STATE_VALID)) { if (x->encap && x1->encap) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + if (x->coaddr && x1->coaddr) { + memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); + } + if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) + memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; -- cgit v1.1 From 97a64b4577ae2bc5599dbd008a3cd9e25de9b9f5 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:44:06 -0700 Subject: [XFRM]: Introduce XFRM_MSG_REPORT. XFRM_MSG_REPORT is a message as notification of state protocol and selector from kernel to user-space. Mobile IPv6 will use it when inbound reject is occurred at route optimization to make user-space know a binding error requirement. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3da89c0..a26ef69 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1055,6 +1055,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); +int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->report) { + ret = km->report(proto, sel, addr); + if (!ret) + err = ret; + } + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_report); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; -- cgit v1.1 From 41a49cc3c02ace59d4dddae91ea211c330970ee3 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:48:31 -0700 Subject: [XFRM]: Add sorting interface for state and template. Under two transformation policies it is required to merge them. This is a platform to sort state for outbound and templates for inbound respectively. It will be used when Mobile IPv6 and IPsec are used at the same time. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a26ef69..622e92a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -728,6 +728,44 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto, } EXPORT_SYMBOL(xfrm_find_acq); +#ifdef CONFIG_XFRM_SUB_POLICY +int +xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->tmpl_sort) + err = afinfo->tmpl_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_tmpl_sort); + +int +xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->state_sort) + err = afinfo->state_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_state_sort); +#endif + /* Silly enough, but I'm lazy to build resolution list */ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) -- cgit v1.1 From 2770834c9f44afd1bfa13914c7285470775af657 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 00:13:10 -0700 Subject: [XFRM]: Pull xfrm_state_bydst hash table knowledge out of afinfo. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 110 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 100 insertions(+), 10 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 622e92a..80f5f9d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -48,6 +48,18 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static __inline__ +unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_dst_hash(addr); + case AF_INET6: + return __xfrm6_dst_hash(addr); + } + return 0; +} + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -489,6 +501,89 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +/* xfrm_state_lock is held */ +static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +{ + unsigned int h = xfrm_dst_hash(daddr, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + if (x->props.reqid != reqid || + x->props.mode != mode || + x->props.family != family || + x->km.state != XFRM_STATE_ACQ || + x->id.spi != 0) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr) || + !ipv6_addr_equal((struct in6_addr *) + x->props.saddr.a6, + (struct in6_addr *)saddr)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + if (!create) + return NULL; + + x = xfrm_state_alloc(); + if (likely(x)) { + switch (family) { + case AF_INET: + x->sel.daddr.a4 = daddr->a4; + x->sel.saddr.a4 = saddr->a4; + x->sel.prefixlen_d = 32; + x->sel.prefixlen_s = 32; + x->props.saddr.a4 = saddr->a4; + x->id.daddr.a4 = daddr->a4; + break; + + case AF_INET6: + ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, + (struct in6_addr *)daddr); + ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, + (struct in6_addr *)saddr); + x->sel.prefixlen_d = 128; + x->sel.prefixlen_s = 128; + ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, + (struct in6_addr *)saddr); + ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr); + break; + }; + + x->km.state = XFRM_STATE_ACQ; + x->id.proto = proto; + x->props.family = family; + x->props.mode = mode; + x->props.reqid = reqid; + x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x); + x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + add_timer(&x->timer); + xfrm_state_hold(x); + list_add_tail(&x->bydst, xfrm_state_bydst+h); + h = xfrm_src_hash(saddr, family); + xfrm_state_hold(x); + list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + wake_up(&km_waitq); + } + + return x; +} + static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, int use_spi) @@ -533,9 +628,9 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = afinfo->find_acq( - x->props.mode, x->props.reqid, x->id.proto, - &x->id.daddr, &x->props.saddr, 0); + x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x->id.proto, + &x->id.daddr, &x->props.saddr, 0); __xfrm_state_insert(x); err = 0; @@ -716,14 +811,11 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + return x; } EXPORT_SYMBOL(xfrm_find_acq); @@ -1181,7 +1273,6 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { - afinfo->state_bydst = xfrm_state_bydst; afinfo->state_bysrc = xfrm_state_bysrc; afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; @@ -1206,7 +1297,6 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) xfrm_state_afinfo[afinfo->family] = NULL; afinfo->state_byspi = NULL; afinfo->state_bysrc = NULL; - afinfo->state_bydst = NULL; } } write_unlock_bh(&xfrm_state_afinfo_lock); -- cgit v1.1 From edcd582152090bfb0ccb4ad444c151798a73eda8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 00:42:45 -0700 Subject: [XFRM]: Pull xfrm_state_by{spi,src} hash table knowledge out of afinfo. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 210 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 159 insertions(+), 51 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 80f5f9d..4a3832f 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -38,6 +38,8 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); static DEFINE_SPINLOCK(xfrm_state_lock); +#define XFRM_DST_HSIZE 1024 + /* Hash table to find appropriate SA towards given target (endpoint * of tunnel or destination of transport mode) allowed by selector. * @@ -49,6 +51,48 @@ static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; static __inline__ +unsigned __xfrm4_dst_hash(xfrm_address_t *addr) +{ + unsigned h; + h = ntohl(addr->a4); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_dst_hash(xfrm_address_t *addr) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm4_src_hash(xfrm_address_t *addr) +{ + return __xfrm4_dst_hash(addr); +} + +static __inline__ +unsigned __xfrm6_src_hash(xfrm_address_t *addr) +{ + return __xfrm6_dst_hash(addr); +} + +static __inline__ +unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_src_hash(addr); + case AF_INET6: + return __xfrm6_src_hash(addr); + } + return 0; +} + +static __inline__ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) { switch (family) { @@ -60,6 +104,36 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) return 0; } +static __inline__ +unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a4^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_spi_hash(addr, spi, proto); + case AF_INET6: + return __xfrm6_spi_hash(addr, spi, proto); + } + return 0; /*XXX*/ +} + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -342,6 +416,83 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } +static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_byspi+h, byspi) { + if (x->props.family != family || + x->id.spi != spi || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_src_hash(saddr, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) { + if (x->props.family != family || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6) || + !ipv6_addr_equal((struct in6_addr *)saddr, + (struct in6_addr *) + x->props.saddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) +{ + if (use_spi) + return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + x->id.proto, family); + else + return __xfrm_state_lookup_byaddr(&x->id.daddr, + &x->props.saddr, + x->id.proto, family); +} + struct xfrm_state * xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -353,14 +504,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; - struct xfrm_state_afinfo *afinfo; - afinfo = xfrm_state_get_afinfo(family); - if (afinfo == NULL) { - *err = -EAFNOSUPPORT; - return NULL; - } - spin_lock_bh(&xfrm_state_lock); list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && @@ -406,8 +550,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, - tmpl->id.proto)) != NULL) { + (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + tmpl->id.proto, family)) != NULL) { xfrm_state_put(x0); error = -EEXIST; goto out; @@ -457,7 +601,6 @@ out: else *err = acquire_in_progress ? -EAGAIN : error; spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } @@ -584,34 +727,20 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re return x; } -static inline struct xfrm_state * -__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, - int use_spi) -{ - if (use_spi) - return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); - else - return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); -} - static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int family; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(afinfo, x, use_spi); + x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -637,7 +766,6 @@ int xfrm_state_add(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (!err) xfrm_flush_all_bundles(); @@ -653,17 +781,12 @@ EXPORT_SYMBOL(xfrm_state_add); int xfrm_state_update(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); - afinfo = xfrm_state_get_afinfo(x->props.family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(afinfo, x, use_spi); + x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; if (!x1) @@ -683,7 +806,6 @@ int xfrm_state_update(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (err) return err; @@ -776,14 +898,10 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup(daddr, spi, proto); + x = __xfrm_state_lookup(daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup); @@ -793,14 +911,10 @@ xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); @@ -1272,11 +1386,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) write_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; - else { - afinfo->state_bysrc = xfrm_state_bysrc; - afinfo->state_byspi = xfrm_state_byspi; + else xfrm_state_afinfo[afinfo->family] = afinfo; - } write_unlock_bh(&xfrm_state_afinfo_lock); return err; } @@ -1293,11 +1404,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { + else xfrm_state_afinfo[afinfo->family] = NULL; - afinfo->state_byspi = NULL; - afinfo->state_bysrc = NULL; - } } write_unlock_bh(&xfrm_state_afinfo_lock); return err; -- cgit v1.1 From 8f126e37c0b250310a48a609bedf92a19a5559ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 02:45:07 -0700 Subject: [XFRM]: Convert xfrm_state hash linkage to hlists. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 92 ++++++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 41 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4a3832f..fe3c8c3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -46,9 +46,9 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; static __inline__ unsigned __xfrm4_dst_hash(xfrm_address_t *addr) @@ -141,7 +141,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); static int xfrm_state_gc_flush_bundles; @@ -178,8 +178,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(void *data) { struct xfrm_state *x; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; if (xfrm_state_gc_flush_bundles) { xfrm_state_gc_flush_bundles = 0; @@ -187,13 +187,13 @@ static void xfrm_state_gc_task(void *data) } spin_lock_bh(&xfrm_state_gc_lock); - list_splice_init(&xfrm_state_gc_list, &gc_list); + gc_list.first = xfrm_state_gc_list.first; + INIT_HLIST_HEAD(&xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - x = list_entry(entry, struct xfrm_state, bydst); + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) xfrm_state_gc_destroy(x); - } + wake_up(&km_waitq); } @@ -287,9 +287,9 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->bydst); - INIT_LIST_HEAD(&x->bysrc); - INIT_LIST_HEAD(&x->byspi); + INIT_HLIST_NODE(&x->bydst); + INIT_HLIST_NODE(&x->bysrc); + INIT_HLIST_NODE(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; x->timer.data = (unsigned long)x; @@ -314,7 +314,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) BUG_TRAP(x->km.state == XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->bydst, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -327,12 +327,12 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - list_del(&x->bydst); + hlist_del(&x->bydst); __xfrm_state_put(x); - list_del(&x->bysrc); + hlist_del(&x->bysrc); __xfrm_state_put(x); if (x->id.spi) { - list_del(&x->byspi); + hlist_del(&x->byspi); __xfrm_state_put(x); } spin_unlock(&xfrm_state_lock); @@ -378,12 +378,13 @@ EXPORT_SYMBOL(xfrm_state_delete); void xfrm_state_flush(u8 proto) { int i; - struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { + struct hlist_node *entry; + struct xfrm_state *x; restart: - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -420,8 +421,9 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 { unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); struct xfrm_state *x; + struct hlist_node *entry; - list_for_each_entry(x, xfrm_state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -451,8 +453,9 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm { unsigned int h = xfrm_src_hash(saddr, family); struct xfrm_state *x; + struct hlist_node *entry; - list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -499,14 +502,15 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, family); + struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; spin_lock_bh(&xfrm_state_lock); - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -575,13 +579,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); - list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + h = xfrm_src_hash(saddr, family); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; @@ -608,19 +613,19 @@ static void __xfrm_state_insert(struct xfrm_state *x) { unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); - list_add(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); - list_add(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); } @@ -648,9 +653,10 @@ EXPORT_SYMBOL(xfrm_state_insert); static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(daddr, family); + struct hlist_node *entry; struct xfrm_state *x; - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -717,10 +723,10 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); xfrm_state_hold(x); - list_add_tail(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(saddr, family); xfrm_state_hold(x); - list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } @@ -977,11 +983,14 @@ EXPORT_SYMBOL(xfrm_state_sort); static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - struct xfrm_state *x; for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { + struct hlist_node *entry; + struct xfrm_state *x; + + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (x->km.seq == seq && + x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; } @@ -1047,7 +1056,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); @@ -1060,12 +1069,13 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), { int i; struct xfrm_state *x; + struct hlist_node *entry; int count = 0; int err = 0; spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto)) count++; } @@ -1076,7 +1086,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), } for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); @@ -1524,9 +1534,9 @@ void __init xfrm_state_init(void) int i; for (i=0; i Date: Thu, 24 Aug 2006 03:08:07 -0700 Subject: [XFRM]: Dynamic xfrm_state hash table sizing. The grow algorithm is simple, we grow if: 1) we see a hash chain collision at insert, and 2) we haven't hit the hash size limit (currently 1*1024*1024 slots), and 3) the number of xfrm_state objects is > the current hash mask All of this needs some tweaking. Remove __initdata from "hashdist" so we can use it safely at run time. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 247 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 195 insertions(+), 52 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index fe3c8c3..445263c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include #include struct sock *xfrm_nl; @@ -38,102 +41,230 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); static DEFINE_SPINLOCK(xfrm_state_lock); -#define XFRM_DST_HSIZE 1024 - /* Hash table to find appropriate SA towards given target (endpoint * of tunnel or destination of transport mode) allowed by selector. * * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; -static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; - -static __inline__ -unsigned __xfrm4_dst_hash(xfrm_address_t *addr) +static struct hlist_head *xfrm_state_bydst __read_mostly; +static struct hlist_head *xfrm_state_bysrc __read_mostly; +static struct hlist_head *xfrm_state_byspi __read_mostly; +static unsigned int xfrm_state_hmask __read_mostly; +static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static unsigned int xfrm_state_num; + +static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a4); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + h = (h ^ (h>>16)) & hmask; return h; } -static __inline__ -unsigned __xfrm6_dst_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + h = (h ^ (h>>16)) & hmask; return h; } -static __inline__ -unsigned __xfrm4_src_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask) { - return __xfrm4_dst_hash(addr); + return __xfrm4_dst_hash(addr, hmask); } -static __inline__ -unsigned __xfrm6_src_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask) { - return __xfrm6_dst_hash(addr); + return __xfrm6_dst_hash(addr, hmask); } -static __inline__ -unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_src_hash(addr); + return __xfrm4_src_hash(addr, hmask); case AF_INET6: - return __xfrm6_src_hash(addr); + return __xfrm6_src_hash(addr, hmask); } return 0; } -static __inline__ -unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + return __xfrm_src_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_dst_hash(addr); + return __xfrm4_dst_hash(addr, hmask); case AF_INET6: - return __xfrm6_dst_hash(addr); + return __xfrm6_dst_hash(addr, hmask); } return 0; } -static __inline__ -unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +{ + return __xfrm_dst_hash(addr, family, xfrm_state_hmask); +} + +static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, + unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + h = (h ^ (h>>10) ^ (h>>20)) & hmask; return h; } -static __inline__ -unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, + unsigned int hmask) { - unsigned h; + unsigned int h; h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + h = (h ^ (h>>10) ^ (h>>20)) & hmask; return h; } -static __inline__ -unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +static inline +unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) { switch (family) { case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto); + return __xfrm4_spi_hash(addr, spi, proto, hmask); case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto); + return __xfrm6_spi_hash(addr, spi, proto, hmask); } return 0; /*XXX*/ } +static inline unsigned int +xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask); +} + +static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} + +static void xfrm_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + struct hlist_head *nsrctable, + struct hlist_head *nspitable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_state *x; + + hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { + unsigned int h; + + h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask); + hlist_add_head(&x->bydst, ndsttable+h); + + h = __xfrm_src_hash(&x->props.saddr, x->props.family, + nhashmask); + hlist_add_head(&x->bysrc, nsrctable+h); + + h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family, nhashmask); + hlist_add_head(&x->byspi, nspitable+h); + } +} + +static unsigned long xfrm_hash_new_size(void) +{ + return ((xfrm_state_hmask + 1) << 1) * + sizeof(struct hlist_head); +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(void *__unused) +{ + struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; + unsigned long nsize, osize; + unsigned int nhashmask, ohashmask; + int i; + + mutex_lock(&hash_resize_mutex); + + nsize = xfrm_hash_new_size(); + ndst = xfrm_state_hash_alloc(nsize); + if (!ndst) + goto out_unlock; + nsrc = xfrm_state_hash_alloc(nsize); + if (!nsrc) { + xfrm_state_hash_free(ndst, nsize); + goto out_unlock; + } + nspi = xfrm_state_hash_alloc(nsize); + if (!nspi) { + xfrm_state_hash_free(ndst, nsize); + xfrm_state_hash_free(nsrc, nsize); + goto out_unlock; + } + + spin_lock_bh(&xfrm_state_lock); + + nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + for (i = xfrm_state_hmask; i >= 0; i--) + xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + nhashmask); + + odst = xfrm_state_bydst; + osrc = xfrm_state_bysrc; + ospi = xfrm_state_byspi; + ohashmask = xfrm_state_hmask; + + xfrm_state_bydst = ndst; + xfrm_state_bysrc = nsrc; + xfrm_state_byspi = nspi; + xfrm_state_hmask = nhashmask; + + spin_unlock_bh(&xfrm_state_lock); + + osize = (ohashmask + 1) * sizeof(struct hlist_head); + xfrm_state_hash_free(odst, osize); + xfrm_state_hash_free(osrc, osize); + xfrm_state_hash_free(ospi, osize); + +out_unlock: + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -335,6 +466,7 @@ int __xfrm_state_delete(struct xfrm_state *x) hlist_del(&x->byspi); __xfrm_state_put(x); } + xfrm_state_num--; spin_unlock(&xfrm_state_lock); if (del_timer(&x->timer)) __xfrm_state_put(x); @@ -380,7 +512,7 @@ void xfrm_state_flush(u8 proto) int i; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i < xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: @@ -611,7 +743,7 @@ out: static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); @@ -637,6 +769,13 @@ static void __xfrm_state_insert(struct xfrm_state *x) xfrm_state_hold(x); wake_up(&km_waitq); + + xfrm_state_num++; + + if (x->bydst.next != NULL && + (xfrm_state_hmask + 1) < xfrm_state_hashmax && + xfrm_state_num > xfrm_state_hmask) + schedule_work(&xfrm_hash_work); } void xfrm_state_insert(struct xfrm_state *x) @@ -984,7 +1123,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -1026,7 +1165,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) { - u32 h; + unsigned int h; struct xfrm_state *x0; if (x->id.spi) @@ -1074,7 +1213,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), int err = 0; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto)) count++; @@ -1085,7 +1224,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), goto out; } - for (i = 0; i < XFRM_DST_HSIZE; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_id_proto_match(x->id.proto, proto)) continue; @@ -1531,13 +1670,17 @@ EXPORT_SYMBOL(xfrm_init_state); void __init xfrm_state_init(void) { - int i; + unsigned int sz; + + sz = sizeof(struct hlist_head) * 8; + + xfrm_state_bydst = xfrm_state_hash_alloc(sz); + xfrm_state_bysrc = xfrm_state_hash_alloc(sz); + xfrm_state_byspi = xfrm_state_hash_alloc(sz); + if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) + panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); + xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); - for (i=0; i Date: Thu, 24 Aug 2006 03:18:09 -0700 Subject: [XFRM]: Add generation count to xfrm_state and xfrm_dst. Each xfrm_state inserted gets a new generation counter value. When a bundle is created, the xfrm_dst objects get the current generation counter of the xfrm_state they will attach to at dst->xfrm. xfrm_bundle_ok() will return false if it sees an xfrm_dst with a generation count different from the generation count of the xfrm_state that dst points to. This provides a facility by which to passively and cheaply invalidate cached IPSEC routes during SA database changes. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 445263c..535d43c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -53,6 +53,7 @@ static struct hlist_head *xfrm_state_byspi __read_mostly; static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; +static unsigned int xfrm_state_genid; static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) { @@ -745,6 +746,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) { unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); + x->genid = ++xfrm_state_genid; + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); -- cgit v1.1 From a624c108e5595b5827796c253481436929cd5344 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:24:33 -0700 Subject: [XFRM]: Put more keys into destination hash function. Besides the daddr, key the hash on family and reqid too. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 75 ++++++++++++++++++++++++--------------------------- 1 file changed, 35 insertions(+), 40 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 535d43c..7e5daaf 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) - 2. Hash table by daddr to find what SAs exist for given + 2. Hash table by (daddr,family,reqid) to find what SAs exist for given destination/tunnel endpoint. (output) */ @@ -55,62 +55,56 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) +static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) { - unsigned int h; - h = ntohl(addr->a4); - h = (h ^ (h>>16)) & hmask; - return h; -} - -static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask) -{ - unsigned int h; - h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) & hmask; - return h; + return ntohl(addr->a4); } -static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask) +static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) { - return __xfrm4_dst_hash(addr, hmask); + return ntohl(addr->a6[2]^addr->a6[3]); } -static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask) -{ - return __xfrm6_dst_hash(addr, hmask); -} - -static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, + u32 reqid, unsigned short family, + unsigned int hmask) { + unsigned int h = family ^ reqid; switch (family) { case AF_INET: - return __xfrm4_src_hash(addr, hmask); + h ^= __xfrm4_addr_hash(addr); + break; case AF_INET6: - return __xfrm6_src_hash(addr, hmask); - } - return 0; + h ^= __xfrm6_addr_hash(addr); + break; + }; + return (h ^ (h >> 16)) & hmask; } -static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid, + unsigned short family) { - return __xfrm_src_hash(addr, family, xfrm_state_hmask); + return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask) +static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, + unsigned int hmask) { + unsigned int h = family; switch (family) { case AF_INET: - return __xfrm4_dst_hash(addr, hmask); + h ^= __xfrm4_addr_hash(addr); + break; case AF_INET6: - return __xfrm6_dst_hash(addr, hmask); - } - return 0; + h ^= __xfrm6_addr_hash(addr); + break; + }; + return (h ^ (h >> 16)) & hmask; } -static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) { - return __xfrm_dst_hash(addr, family, xfrm_state_hmask); + return __xfrm_src_hash(addr, family, xfrm_state_hmask); } static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, @@ -190,7 +184,8 @@ static void xfrm_hash_transfer(struct hlist_head *list, hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { unsigned int h; - h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask); + h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid, + x->props.family, nhashmask); hlist_add_head(&x->bydst, ndsttable+h); h = __xfrm_src_hash(&x->props.saddr, x->props.family, @@ -635,7 +630,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned int h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family); struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; @@ -744,15 +739,15 @@ out: static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); + unsigned int h; x->genid = ++xfrm_state_genid; + h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); @@ -794,7 +789,7 @@ EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; -- cgit v1.1 From 2575b65434d56559bd03854450b9b6aaf19b9c90 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:26:44 -0700 Subject: [XFRM]: Simplify xfrm_spi_hash It can use __xfrm{4,6}_addr_hash(). Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7e5daaf..9820039 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -107,35 +107,20 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family return __xfrm_src_hash(addr, family, xfrm_state_hmask); } -static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, - unsigned int hmask) -{ - unsigned int h; - h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) & hmask; - return h; -} - -static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, - unsigned int hmask) -{ - unsigned int h; - h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) & hmask; - return h; -} - -static inline -unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, - unsigned int hmask) +static inline unsigned int +__xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, + unsigned int hmask) { + unsigned int h = spi ^ proto; switch (family) { case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto, hmask); + h ^= __xfrm4_addr_hash(addr); + break; case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto, hmask); + h ^= __xfrm6_addr_hash(addr); + break; } - return 0; /*XXX*/ + return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int -- cgit v1.1 From c7f5ea3a4d1ae6b3b426e113358fdc57494bc754 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:29:04 -0700 Subject: [XFRM]: Do not flush all bundles on SA insert. Instead, simply set all potentially aliasing existing xfrm_state objects to have the current generation counter value. This will make routes get relooked up the next time an existing route mentioning these aliased xfrm_state objects gets used, via xfrm_dst_check(). Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9820039..77ef796 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -761,13 +761,30 @@ static void __xfrm_state_insert(struct xfrm_state *x) schedule_work(&xfrm_hash_work); } +/* xfrm_state_lock is held */ +static void __xfrm_state_bump_genids(struct xfrm_state *xnew) +{ + unsigned short family = xnew->props.family; + u32 reqid = xnew->props.reqid; + struct xfrm_state *x; + struct hlist_node *entry; + unsigned int h; + + h = xfrm_dst_hash(&xnew->id.daddr, reqid, family); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family)) + x->genid = xfrm_state_genid; + } +} + void xfrm_state_insert(struct xfrm_state *x) { spin_lock_bh(&xfrm_state_lock); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); spin_unlock_bh(&xfrm_state_lock); - - xfrm_flush_all_bundles(); } EXPORT_SYMBOL(xfrm_state_insert); @@ -889,15 +906,13 @@ int xfrm_state_add(struct xfrm_state *x) x->id.proto, &x->id.daddr, &x->props.saddr, 0); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); err = 0; out: spin_unlock_bh(&xfrm_state_lock); - if (!err) - xfrm_flush_all_bundles(); - if (x1) { xfrm_state_delete(x1); xfrm_state_put(x1); -- cgit v1.1 From 1c0953997567b22e32fdf85d3b4bc0f2461fd161 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:30:28 -0700 Subject: [XFRM]: Purge dst references to deleted SAs passively. Just let GC and other normal mechanisms take care of getting rid of DST cache references to deleted xfrm_state objects instead of walking all the policy bundles. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 77ef796..9ff00b7 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -256,8 +256,6 @@ static struct work_struct xfrm_state_gc_work; static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); -static int xfrm_state_gc_flush_bundles; - int __xfrm_state_delete(struct xfrm_state *x); static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); @@ -293,11 +291,6 @@ static void xfrm_state_gc_task(void *data) struct hlist_node *entry, *tmp; struct hlist_head gc_list; - if (xfrm_state_gc_flush_bundles) { - xfrm_state_gc_flush_bundles = 0; - xfrm_flush_bundles(); - } - spin_lock_bh(&xfrm_state_gc_lock); gc_list.first = xfrm_state_gc_list.first; INIT_HLIST_HEAD(&xfrm_state_gc_list); @@ -454,16 +447,6 @@ int __xfrm_state_delete(struct xfrm_state *x) if (del_timer(&x->rtimer)) __xfrm_state_put(x); - /* The number two in this test is the reference - * mentioned in the comment below plus the reference - * our caller holds. A larger value means that - * there are DSTs attached to this xfrm_state. - */ - if (atomic_read(&x->refcnt) > 2) { - xfrm_state_gc_flush_bundles = 1; - schedule_work(&xfrm_state_gc_work); - } - /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. -- cgit v1.1 From a47f0ce05ae12ce9acad62896ff703175764104e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:54:22 -0700 Subject: [XFRM]: Kill excessive refcounting of xfrm_state objects. The refcounting done for timers and hash table insertions are just wasted cycles. We can eliminate all of this refcounting because: 1) The implicit refcount when the xfrm_state object is active will always be held while the object is in the hash tables. We never kfree() the xfrm_state until long after we've made sure that it has been unhashed. 2) Timers are even easier. Once we mark that x->km.state as anything other than XFRM_STATE_VALID (__xfrm_state_delete sets it to XFRM_STATE_DEAD), any timer that fires will do nothing and return without rearming the timer. Therefore we can defer the del_timer calls until when the object is about to be freed up during GC. We have to use del_timer_sync() and defer it to GC because we can't do a del_timer_sync() while holding x->lock which all callers of __xfrm_state_delete hold. This makes SA changes even more light-weight. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 53 ++++++++++++--------------------------------------- 1 file changed, 12 insertions(+), 41 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9ff00b7..0bc6a4b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -266,10 +266,8 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid); static void xfrm_state_gc_destroy(struct xfrm_state *x) { - if (del_timer(&x->timer)) - BUG(); - if (del_timer(&x->rtimer)) - BUG(); + del_timer_sync(&x->timer); + del_timer_sync(&x->rtimer); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); @@ -361,9 +359,9 @@ static void xfrm_timer_handler(unsigned long data) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX && - !mod_timer(&x->timer, jiffies + make_jiffies(next))) - xfrm_state_hold(x); + if (next != LONG_MAX) + mod_timer(&x->timer, jiffies + make_jiffies(next)); + goto out; expired: @@ -378,7 +376,6 @@ expired: out: spin_unlock(&x->lock); - xfrm_state_put(x); } static void xfrm_replay_timer_handler(unsigned long data); @@ -433,19 +430,11 @@ int __xfrm_state_delete(struct xfrm_state *x) x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); hlist_del(&x->bydst); - __xfrm_state_put(x); hlist_del(&x->bysrc); - __xfrm_state_put(x); - if (x->id.spi) { + if (x->id.spi) hlist_del(&x->byspi); - __xfrm_state_put(x); - } xfrm_state_num--; spin_unlock(&xfrm_state_lock); - if (del_timer(&x->timer)) - __xfrm_state_put(x); - if (del_timer(&x->rtimer)) - __xfrm_state_put(x); /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that @@ -676,17 +665,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; hlist_add_head(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); h = xfrm_src_hash(saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x); x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); } else { @@ -713,26 +698,20 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); - xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - xfrm_state_hold(x); if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); } - if (!mod_timer(&x->timer, jiffies + HZ)) - xfrm_state_hold(x); - - if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) - xfrm_state_hold(x); + mod_timer(&x->timer, jiffies + HZ); + if (x->replay_maxage) + mod_timer(&x->rtimer, jiffies + x->replay_maxage); wake_up(&km_waitq); @@ -844,10 +823,8 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); - xfrm_state_hold(x); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(saddr, family); - xfrm_state_hold(x); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } @@ -955,8 +932,7 @@ out: memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - if (!mod_timer(&x1->timer, jiffies + HZ)) - xfrm_state_hold(x1); + mod_timer(&x1->timer, jiffies + HZ); if (x1->curlft.use_time) xfrm_state_check_expire(x1); @@ -981,8 +957,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - if (!mod_timer(&x->timer, jiffies)) - xfrm_state_hold(x); + mod_timer(&x->timer, jiffies); return -EINVAL; } @@ -1177,7 +1152,6 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); - xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); } @@ -1264,10 +1238,8 @@ void xfrm_replay_notify(struct xfrm_state *x, int event) km_state_notify(x, &c); if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) { - xfrm_state_hold(x); + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) x->xflags &= ~XFRM_TIME_DEFER; - } } EXPORT_SYMBOL(xfrm_replay_notify); @@ -1285,7 +1257,6 @@ static void xfrm_replay_timer_handler(unsigned long data) } spin_unlock(&x->lock); - xfrm_state_put(x); } int xfrm_replay_check(struct xfrm_state *x, u32 seq) -- cgit v1.1 From c1969f294e624d5b642fc8e6ab9468b7c7791fa8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 04:00:03 -0700 Subject: [XFRM]: Hash xfrm_state objects by source address too. The source address is always non-prefixed so we should use it to help give entropy to the bydst hash. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 53 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 18 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0bc6a4b..37213f9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -65,26 +65,40 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) return ntohl(addr->a6[2]^addr->a6[3]); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, +static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a4 ^ saddr->a4); +} + +static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +{ + return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); +} + +static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, u32 reqid, unsigned short family, unsigned int hmask) { unsigned int h = family ^ reqid; switch (family) { case AF_INET: - h ^= __xfrm4_addr_hash(addr); + h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); break; case AF_INET6: - h ^= __xfrm6_addr_hash(addr); + h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); break; }; return (h ^ (h >> 16)) & hmask; } -static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid, +static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, + xfrm_address_t *saddr, + u32 reqid, unsigned short family) { - return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask); + return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); } static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, @@ -108,25 +122,25 @@ static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family } static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family, - unsigned int hmask) +__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, + unsigned short family, unsigned int hmask) { unsigned int h = spi ^ proto; switch (family) { case AF_INET: - h ^= __xfrm4_addr_hash(addr); + h ^= __xfrm4_addr_hash(daddr); break; case AF_INET6: - h ^= __xfrm6_addr_hash(addr); + h ^= __xfrm6_addr_hash(daddr); break; } return (h ^ (h >> 10) ^ (h >> 20)) & hmask; } static inline unsigned int -xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { - return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask); + return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); } static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) @@ -169,8 +183,9 @@ static void xfrm_hash_transfer(struct hlist_head *list, hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { unsigned int h; - h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid, - x->props.family, nhashmask); + h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family, + nhashmask); hlist_add_head(&x->bydst, ndsttable+h); h = __xfrm_src_hash(&x->props.saddr, x->props.family, @@ -587,7 +602,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family); + unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; @@ -696,7 +711,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family); + h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, + x->props.reqid, x->props.family); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(&x->props.saddr, x->props.family); @@ -732,11 +748,12 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) struct hlist_node *entry; unsigned int h; - h = xfrm_dst_hash(&xnew->id.daddr, reqid, family); + h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && - !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family)) + !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && + !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) x->genid = xfrm_state_genid; } } @@ -753,7 +770,7 @@ EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(daddr, reqid, family); + unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; -- cgit v1.1 From 44e36b42a8378be1dcf7e6f8a1cb2710a8903387 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 04:50:50 -0700 Subject: [XFRM]: Extract common hashing code into xfrm_hash.[ch] Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 128 +++++++------------------------------------------- 1 file changed, 16 insertions(+), 112 deletions(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 37213f9..4341795 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -18,11 +18,11 @@ #include #include #include -#include -#include #include #include +#include "xfrm_hash.h" + struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); @@ -55,44 +55,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; -static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) -{ - return ntohl(addr->a4); -} - -static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) -{ - return ntohl(addr->a6[2]^addr->a6[3]); -} - -static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -{ - return ntohl(daddr->a4 ^ saddr->a4); -} - -static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -{ - return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); -} - -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, - u32 reqid, unsigned short family, - unsigned int hmask) -{ - unsigned int h = family ^ reqid; - switch (family) { - case AF_INET: - h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); - break; - case AF_INET6: - h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); - break; - }; - return (h ^ (h >> 16)) & hmask; -} - static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, u32 reqid, @@ -101,76 +63,18 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); } -static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, - unsigned int hmask) -{ - unsigned int h = family; - switch (family) { - case AF_INET: - h ^= __xfrm4_addr_hash(addr); - break; - case AF_INET6: - h ^= __xfrm6_addr_hash(addr); - break; - }; - return (h ^ (h >> 16)) & hmask; -} - -static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, + unsigned short family) { return __xfrm_src_hash(addr, family, xfrm_state_hmask); } static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, - unsigned short family, unsigned int hmask) -{ - unsigned int h = spi ^ proto; - switch (family) { - case AF_INET: - h ^= __xfrm4_addr_hash(daddr); - break; - case AF_INET6: - h ^= __xfrm6_addr_hash(daddr); - break; - } - return (h ^ (h >> 10) ^ (h >> 20)) & hmask; -} - -static inline unsigned int xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); } -static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz) -{ - struct hlist_head *n; - - if (sz <= PAGE_SIZE) - n = kmalloc(sz, GFP_KERNEL); - else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); - else - n = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(sz)); - - if (n) - memset(n, 0, sz); - - return n; -} - -static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz) -{ - if (sz <= PAGE_SIZE) - kfree(n); - else if (hashdist) - vfree(n); - else - free_pages((unsigned long)n, get_order(sz)); -} - static void xfrm_hash_transfer(struct hlist_head *list, struct hlist_head *ndsttable, struct hlist_head *nsrctable, @@ -216,18 +120,18 @@ static void xfrm_hash_resize(void *__unused) mutex_lock(&hash_resize_mutex); nsize = xfrm_hash_new_size(); - ndst = xfrm_state_hash_alloc(nsize); + ndst = xfrm_hash_alloc(nsize); if (!ndst) goto out_unlock; - nsrc = xfrm_state_hash_alloc(nsize); + nsrc = xfrm_hash_alloc(nsize); if (!nsrc) { - xfrm_state_hash_free(ndst, nsize); + xfrm_hash_free(ndst, nsize); goto out_unlock; } - nspi = xfrm_state_hash_alloc(nsize); + nspi = xfrm_hash_alloc(nsize); if (!nspi) { - xfrm_state_hash_free(ndst, nsize); - xfrm_state_hash_free(nsrc, nsize); + xfrm_hash_free(ndst, nsize); + xfrm_hash_free(nsrc, nsize); goto out_unlock; } @@ -251,9 +155,9 @@ static void xfrm_hash_resize(void *__unused) spin_unlock_bh(&xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); - xfrm_state_hash_free(odst, osize); - xfrm_state_hash_free(osrc, osize); - xfrm_state_hash_free(ospi, osize); + xfrm_hash_free(odst, osize); + xfrm_hash_free(osrc, osize); + xfrm_hash_free(ospi, osize); out_unlock: mutex_unlock(&hash_resize_mutex); @@ -1643,9 +1547,9 @@ void __init xfrm_state_init(void) sz = sizeof(struct hlist_head) * 8; - xfrm_state_bydst = xfrm_state_hash_alloc(sz); - xfrm_state_bysrc = xfrm_state_hash_alloc(sz); - xfrm_state_byspi = xfrm_state_hash_alloc(sz); + xfrm_state_bydst = xfrm_hash_alloc(sz); + xfrm_state_bysrc = xfrm_hash_alloc(sz); + xfrm_state_byspi = xfrm_hash_alloc(sz); if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); -- cgit v1.1 From a9917c06652165fe4eeb9ab7a5d1e0674e90e508 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Thu, 31 Aug 2006 15:14:32 -0700 Subject: [XFRM] STATE: Fix flusing with hash mask. This is a minor fix about transformation state flushing for net-2.6.19. Please apply it. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/xfrm/xfrm_state.c') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4341795..9f63edd 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -384,7 +384,7 @@ void xfrm_state_flush(u8 proto) int i; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i < xfrm_state_hmask; i++) { + for (i = 0; i <= xfrm_state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: -- cgit v1.1