From e5fc9e7a666e5964b60e05903b90aa832354b68c Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 12 Nov 2010 17:33:17 +0100 Subject: netfilter: nf_conntrack: don't always initialize ct->proto ct->proto is big(60 bytes) due to structure ip_ct_tcp, and we don't need to initialize the whole for all the other protocols. This patch moves proto to the end of structure nf_conn, and pushes the initialization down to the individual protocols. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 3 ++- net/netfilter/nf_conntrack_netlink.c | 1 + net/netfilter/nf_conntrack_proto_dccp.c | 3 +++ net/netfilter/nf_conntrack_proto_sctp.c | 1 + net/netfilter/nf_conntrack_proto_tcp.c | 14 +++----------- 5 files changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 27a5ea6..0ba7d48 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -651,7 +651,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone, * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. */ memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, - sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); + offsetof(struct nf_conn, proto) - + offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); spin_lock_init(&ct->lock); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b729ace..7f59be8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1375,6 +1375,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } #endif + memset(&ct->proto, 0, sizeof(ct->proto)); if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 5292560..9ae57c5 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.state = CT_DCCP_NONE; + ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; + ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; + ct->proto.dccp.handshake_seq = 0; return true; out_invalid: diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6049c2..6f4ee70 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, test_bit(SCTP_CID_COOKIE_ACK, map)) return false; + memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp)); new_state = SCTP_CONNTRACK_MAX; for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Don't need lock here: this conntrack not in circulation yet */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3fb2b73..6f38d0e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, BUG_ON(th == NULL); /* Don't need lock here: this conntrack not in circulation yet */ - new_state - = tcp_conntracks[0][get_conntrack_index(th)] - [TCP_CONNTRACK_NONE]; + new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; /* Invalid: delete conntrack */ if (new_state >= TCP_CONNTRACK_MAX) { @@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, } if (new_state == TCP_CONNTRACK_SYN_SENT) { + memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); /* SYN packet */ ct->proto.tcp.seen[0].td_end = segment_seq_plus_len(ntohl(th->seq), skb->len, @@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.tcp.seen[0].td_end; tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); - ct->proto.tcp.seen[1].flags = 0; } else if (nf_ct_tcp_loose == 0) { /* Don't try to pick up connections. */ return false; } else { + memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); /* * We are in the middle of a connection, * its history is lost for us. @@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.tcp.seen[0].td_maxend = ct->proto.tcp.seen[0].td_end + ct->proto.tcp.seen[0].td_maxwin; - ct->proto.tcp.seen[0].td_scale = 0; /* We assume SACK and liberal window checking to handle * window scaling */ @@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, IP_CT_TCP_FLAG_BE_LIBERAL; } - ct->proto.tcp.seen[1].td_end = 0; - ct->proto.tcp.seen[1].td_maxend = 0; - ct->proto.tcp.seen[1].td_maxwin = 0; - ct->proto.tcp.seen[1].td_scale = 0; - /* tcp_packet will set them */ - ct->proto.tcp.state = TCP_CONNTRACK_NONE; ct->proto.tcp.last_index = TCP_NONE_SET; pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " -- cgit v1.1 From ca36181050a523f6c0af3ef7cb509bbbc4ede276 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 12 Nov 2010 17:34:17 +0100 Subject: netfilter: xt_NFQUEUE: remove modulo operations Signed-off-by: Changli Gao Acked-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/xt_NFQUEUE.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 039cce1..3962770 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -72,10 +72,12 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) if (info->queues_total > 1) { if (par->family == NFPROTO_IPV4) - queue = hash_v4(skb) % info->queues_total + queue; + queue = (((u64) hash_v4(skb) * info->queues_total) >> + 32) + queue; #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) else if (par->family == NFPROTO_IPV6) - queue = hash_v6(skb) % info->queues_total + queue; + queue = (((u64) hash_v6(skb) * info->queues_total) >> + 32) + queue; #endif } return NF_QUEUE_NR(queue); -- cgit v1.1 From b468645d72c2b4a15512f0a18e77670ea058b861 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 15 Nov 2010 11:23:06 +0100 Subject: netfilter: xt_LOG: do print MAC header on FORWARD I am observing consistent behavior even with bridges, so let's unlock this. xt_mac is already usable in FORWARD, too. Section 9 of http://ebtables.sourceforge.net/br_fw_ia/br_fw_ia.html#section9 says the MAC source address is changed, but my observation does not match that claim -- the MAC header is retained. Signed-off-by: Jan Engelhardt [Patrick; code inspection seems to confirm this] Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_LOG.c | 3 +-- net/ipv6/netfilter/ip6t_LOG.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 72ffc8f..d76d6c9 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf, } #endif - /* MAC logging for input path only. */ - if (in && !out) + if (in != NULL) dump_mac_header(m, loginfo, skb); dump_packet(m, loginfo, skb, 0); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 09c8889..05027b7 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf, in ? in->name : "", out ? out->name : ""); - /* MAC logging for input path only. */ - if (in && !out) + if (in != NULL) dump_mac_header(m, loginfo, skb); dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); -- cgit v1.1 From 3b2368806915e1e69ac3bcc0d6a7cfde64307655 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 15 Nov 2010 11:47:52 +0100 Subject: netfilter: ct_extend: fix the wrong alloc_size In function update_alloc_size(), sizeof(struct nf_ct_ext) is added twice wrongly. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_extend.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index bd82450..920f924 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -144,9 +144,8 @@ static void update_alloc_size(struct nf_ct_ext_type *type) if (!t1) continue; - t1->alloc_size = sizeof(struct nf_ct_ext) - + ALIGN(sizeof(struct nf_ct_ext), t1->align) - + t1->len; + t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) + + t1->len; for (j = 0; j < NF_CT_EXT_NUM; j++) { t2 = nf_ct_ext_types[j]; if (t2 == NULL || t2 == t1 || -- cgit v1.1 From 76a2d3bcfcc86e2a8044258515b86492a37631a3 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 15 Nov 2010 11:59:03 +0100 Subject: netfilter: nf_nat: don't use atomic bit operation As we own the conntrack and the others can't see it until we confirm it, we don't need to use atomic bit operation on ct->status. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index c04787c..ab877ac 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -323,9 +323,9 @@ nf_nat_setup_info(struct nf_conn *ct, /* It's done. */ if (maniptype == IP_NAT_MANIP_DST) - set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); + ct->status |= IPS_DST_NAT_DONE_BIT; else - set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); + ct->status |= IPS_SRC_NAT_DONE_BIT; return NF_ACCEPT; } -- cgit v1.1 From 9811600f7c1f18152430c6b93b0a76fdd88a59ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=E9d=E9ric=20Leroy?= Date: Mon, 15 Nov 2010 13:57:56 +0100 Subject: netfilter: xt_CLASSIFY: add ARP support, allow CLASSIFY target on any table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Frédéric Leroy Signed-off-by: Patrick McHardy --- net/netfilter/xt_CLASSIFY.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index c2c0e4a..af9c4da 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -19,12 +19,14 @@ #include #include #include +#include MODULE_AUTHOR("Patrick McHardy "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: Qdisc classification"); MODULE_ALIAS("ipt_CLASSIFY"); MODULE_ALIAS("ip6t_CLASSIFY"); +MODULE_ALIAS("arpt_CLASSIFY"); static unsigned int classify_tg(struct sk_buff *skb, const struct xt_action_param *par) @@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par) return XT_CONTINUE; } -static struct xt_target classify_tg_reg __read_mostly = { - .name = "CLASSIFY", - .revision = 0, - .family = NFPROTO_UNSPEC, - .table = "mangle", - .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | - (1 << NF_INET_POST_ROUTING), - .target = classify_tg, - .targetsize = sizeof(struct xt_classify_target_info), - .me = THIS_MODULE, +static struct xt_target classify_tg_reg[] __read_mostly = { + { + .name = "CLASSIFY", + .revision = 0, + .family = NFPROTO_UNSPEC, + .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_POST_ROUTING), + .target = classify_tg, + .targetsize = sizeof(struct xt_classify_target_info), + .me = THIS_MODULE, + }, + { + .name = "CLASSIFY", + .revision = 0, + .family = NFPROTO_ARP, + .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD), + .target = classify_tg, + .targetsize = sizeof(struct xt_classify_target_info), + .me = THIS_MODULE, + }, }; static int __init classify_tg_init(void) { - return xt_register_target(&classify_tg_reg); + return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); } static void __exit classify_tg_exit(void) { - xt_unregister_target(&classify_tg_reg); + xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); } module_init(classify_tg_init); -- cgit v1.1 From 0e60ebe04c51807db972d03665651ae6b5c26d7e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 18:17:21 +0100 Subject: netfilter: add __rcu annotations Add some __rcu annotations and use helpers to reduce number of sparse warnings (CONFIG_SPARSE_RCU_POINTER=y) Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/core.c | 4 ++-- net/netfilter/nf_conntrack_expect.c | 6 +++--- net/netfilter/nf_conntrack_proto.c | 20 +++++++++++++++----- net/netfilter/nf_conntrack_standalone.c | 9 ++++++--- net/netfilter/nf_log.c | 6 ++++-- net/netfilter/nf_queue.c | 18 ++++++++++++++---- net/netfilter/nfnetlink_log.c | 6 +++--- 7 files changed, 47 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 85dabb8..5faec4f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL(skb_make_writable); /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated with it. */ -void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); +void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly; EXPORT_SYMBOL(ip_ct_attach); void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) @@ -229,7 +229,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) } EXPORT_SYMBOL(nf_ct_attach); -void (*nf_ct_destroy)(struct nf_conntrack *); +void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly; EXPORT_SYMBOL(nf_ct_destroy); void nf_conntrack_destroy(struct nf_conntrack *nfct) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 46e8966..cab196c 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -482,7 +482,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(net->ct.expect_hash[st->bucket].first); + n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); if (n) return n; } @@ -495,11 +495,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; - head = rcu_dereference(head->next); + head = rcu_dereference(hlist_next_rcu(head)); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(net->ct.expect_hash[st->bucket].first); + head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); } return head; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index dc7bb74..03b56a0 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) { int ret = 0; + struct nf_conntrack_l3proto *old; if (proto->l3proto >= AF_MAX) return -EBUSY; @@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) return -EINVAL; mutex_lock(&nf_ct_proto_mutex); - if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { + old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], + lockdep_is_held(&nf_ct_proto_mutex)); + if (old != &nf_conntrack_l3proto_generic) { ret = -EBUSY; goto out_unlock; } @@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) BUG_ON(proto->l3proto >= AF_MAX); mutex_lock(&nf_ct_proto_mutex); - BUG_ON(nf_ct_l3protos[proto->l3proto] != proto); + BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], + lockdep_is_held(&nf_ct_proto_mutex) + ) != proto); rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], &nf_conntrack_l3proto_generic); nf_ct_l3proto_unregister_sysctl(proto); @@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) smp_wmb(); nf_ct_protos[l4proto->l3proto] = proto_array; - } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != - &nf_conntrack_l4proto_generic) { + } else if (rcu_dereference_protected( + nf_ct_protos[l4proto->l3proto][l4proto->l4proto], + lockdep_is_held(&nf_ct_proto_mutex) + ) != &nf_conntrack_l4proto_generic) { ret = -EBUSY; goto out_unlock; } @@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) BUG_ON(l4proto->l3proto >= PF_MAX); mutex_lock(&nf_ct_proto_mutex); - BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto); + BUG_ON(rcu_dereference_protected( + nf_ct_protos[l4proto->l3proto][l4proto->l4proto], + lockdep_is_held(&nf_ct_proto_mutex) + ) != l4proto); rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], &nf_conntrack_l4proto_generic); nf_ct_l4proto_unregister_sysctl(l4proto); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0fb6570..328f1d2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -29,6 +29,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); @@ -56,7 +57,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < net->ct.htable_size; st->bucket++) { - n = rcu_dereference(net->ct.hash[st->bucket].first); + n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -69,13 +70,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; - head = rcu_dereference(head->next); + head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { if (++st->bucket >= net->ct.htable_size) return NULL; } - head = rcu_dereference(net->ct.hash[st->bucket].first); + head = rcu_dereference( + hlist_nulls_first_rcu( + &net->ct.hash[st->bucket])); } return head; } diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index b07393e..20c775c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v) struct nf_logger *t; int ret; - logger = nf_loggers[*pos]; + logger = rcu_dereference_protected(nf_loggers[*pos], + lockdep_is_held(&nf_log_mutex)); if (!logger) ret = seq_printf(s, "%2lld NONE (", *pos); @@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write, mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); - logger = nf_loggers[tindex]; + logger = rcu_dereference_protected(nf_loggers[tindex], + lockdep_is_held(&nf_log_mutex)); if (!logger) table->data = "NONE"; else diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 74aebed..1876f74 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex); int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { int ret; + const struct nf_queue_handler *old; if (pf >= ARRAY_SIZE(queue_handler)) return -EINVAL; mutex_lock(&queue_handler_mutex); - if (queue_handler[pf] == qh) + old = rcu_dereference_protected(queue_handler[pf], + lockdep_is_held(&queue_handler_mutex)); + if (old == qh) ret = -EEXIST; - else if (queue_handler[pf]) + else if (old) ret = -EBUSY; else { rcu_assign_pointer(queue_handler[pf], qh); @@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { + const struct nf_queue_handler *old; + if (pf >= ARRAY_SIZE(queue_handler)) return -EINVAL; mutex_lock(&queue_handler_mutex); - if (queue_handler[pf] && queue_handler[pf] != qh) { + old = rcu_dereference_protected(queue_handler[pf], + lockdep_is_held(&queue_handler_mutex)); + if (old && old != qh) { mutex_unlock(&queue_handler_mutex); return -EINVAL; } @@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) mutex_lock(&queue_handler_mutex); for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { - if (queue_handler[pf] == qh) + if (rcu_dereference_protected( + queue_handler[pf], + lockdep_is_held(&queue_handler_mutex) + ) == qh) rcu_assign_pointer(queue_handler[pf], NULL); } mutex_unlock(&queue_handler_mutex); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6a1572b..91592da 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st) for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { if (!hlist_empty(&instance_table[st->bucket])) - return rcu_dereference_bh(instance_table[st->bucket].first); + return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); } return NULL; } static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) { - h = rcu_dereference_bh(h->next); + h = rcu_dereference_bh(hlist_next_rcu(h)); while (!h) { if (++st->bucket >= INSTANCE_BUCKETS) return NULL; - h = rcu_dereference_bh(instance_table[st->bucket].first); + h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); } return h; } -- cgit v1.1 From be9e9163afcfc3137e7c6377cb0c7b406318fde0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 18:18:29 +0100 Subject: netfilter: nf_ct_frag6_sysctl_table is static Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3a3f129..eb9f1c0 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -73,7 +73,7 @@ static struct inet_frags nf_frags; static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL -struct ctl_table nf_ct_frag6_sysctl_table[] = { +static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", .data = &nf_init_frags.timeout, -- cgit v1.1 From eb733162ae4f69b93f7c08012e6e239f31796de8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 18:43:59 +0100 Subject: netfilter: add __rcu annotations Use helpers to reduce number of sparse warnings (CONFIG_SPARSE_RCU_POINTER=y) Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 17 +++++++++++------ net/ipv4/netfilter/nf_nat_core.c | 5 ++++- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 37f8adb..ab9c05c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -20,6 +20,7 @@ #include #include #include +#include struct ct_iter_state { struct seq_net_private p; @@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < net->ct.htable_size; st->bucket++) { - n = rcu_dereference(net->ct.hash[st->bucket].first); + n = rcu_dereference( + hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; - head = rcu_dereference(head->next); + head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { if (++st->bucket >= net->ct.htable_size) return NULL; } - head = rcu_dereference(net->ct.hash[st->bucket].first); + head = rcu_dereference( + hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); } return head; } @@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(net->ct.expect_hash[st->bucket].first); + n = rcu_dereference( + hlist_first_rcu(&net->ct.expect_hash[st->bucket])); if (n) return n; } @@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; - head = rcu_dereference(head->next); + head = rcu_dereference(hlist_next_rcu(head)); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(net->ct.expect_hash[st->bucket].first); + head = rcu_dereference( + hlist_first_rcu(&net->ct.expect_hash[st->bucket])); } return head; } diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index ab877ac..eb55835 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -502,7 +502,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto) int ret = 0; spin_lock_bh(&nf_nat_lock); - if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { + if (rcu_dereference_protected( + nf_nat_protos[proto->protonum], + lockdep_is_held(&nf_nat_lock) + ) != &nf_nat_unknown_protocol) { ret = -EBUSY; goto out; } -- cgit v1.1 From ab0cba25128e1435a59b1ec4ae0c7505548fed87 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 18:45:12 +0100 Subject: netfilter: nf_nat_amanda: rename a variable Avoid a sparse warning about 'ret' variable shadowing Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_amanda.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 0f23b3f..703f366f 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb, /* Try to get same port: if not, try to change it. */ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; + int res; exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp); - if (ret == 0) + res = nf_ct_expect_related(exp); + if (res == 0) break; - else if (ret != -EBUSY) { + else if (res != -EBUSY) { port = 0; break; } -- cgit v1.1 From c5d277d29ad1ae9add8d6984025ccd2e835971ce Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 19:45:13 +0100 Subject: netfilter: rcu sparse cleanups Use RCU helpers to reduce number of sparse warnings (CONFIG_SPARSE_RCU_POINTER=y), and adds lockdep checks. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 15 ++++++++++++--- net/netfilter/nf_conntrack_extend.c | 6 ++++-- net/netfilter/nf_conntrack_helper.c | 10 ++++++++-- net/netfilter/nf_conntrack_proto.c | 4 ++-- 4 files changed, 26 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index cab196c..bbb2140 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -337,7 +337,10 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); if (master_help) { - p = &master_help->helper->expect_policy[exp->class]; + p = &rcu_dereference_protected( + master_help->helper, + lockdep_is_held(&nf_conntrack_lock) + )->expect_policy[exp->class]; exp->timeout.expires = jiffies + p->timeout * HZ; } add_timer(&exp->timeout); @@ -373,7 +376,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i) if (!del_timer(&i->timeout)) return 0; - p = &master_help->helper->expect_policy[i->class]; + p = &rcu_dereference_protected( + master_help->helper, + lockdep_is_held(&nf_conntrack_lock) + )->expect_policy[i->class]; i->timeout.expires = jiffies + p->timeout * HZ; add_timer(&i->timeout); return 1; @@ -411,7 +417,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) } /* Will be over limit? */ if (master_help) { - p = &master_help->helper->expect_policy[expect->class]; + p = &rcu_dereference_protected( + master_help->helper, + lockdep_is_held(&nf_conntrack_lock) + )->expect_policy[expect->class]; if (p->max_expected && master_help->expecting[expect->class] >= p->max_expected) { evict_oldest_expect(master, expect); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 920f924..80a23ed 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -140,14 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type) /* This assumes that extended areas in conntrack for the types whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ for (i = min; i <= max; i++) { - t1 = nf_ct_ext_types[i]; + t1 = rcu_dereference_protected(nf_ct_ext_types[i], + lockdep_is_held(&nf_ct_ext_type_mutex)); if (!t1) continue; t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) + t1->len; for (j = 0; j < NF_CT_EXT_NUM; j++) { - t2 = nf_ct_ext_types[j]; + t2 = rcu_dereference_protected(nf_ct_ext_types[j], + lockdep_is_held(&nf_ct_ext_type_mutex)); if (t2 == NULL || t2 == t1 || (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) continue; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 59e1a4c..767bbe9 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -158,7 +158,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); - if (help && help->helper == me) { + if (help && rcu_dereference_protected( + help->helper, + lockdep_is_held(&nf_conntrack_lock) + ) == me) { nf_conntrack_event(IPCT_HELPER, ct); rcu_assign_pointer(help->helper, NULL); } @@ -210,7 +213,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, hlist_for_each_entry_safe(exp, n, next, &net->ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); - if ((help->helper == me || exp->helper == me) && + if ((rcu_dereference_protected( + help->helper, + lockdep_is_held(&nf_conntrack_lock) + ) == me || exp->helper == me) && del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 03b56a0..5701c8d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -284,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) mutex_lock(&nf_ct_proto_mutex); if (!nf_ct_protos[l4proto->l3proto]) { /* l3proto may be loaded latter. */ - struct nf_conntrack_l4proto **proto_array; + struct nf_conntrack_l4proto __rcu **proto_array; int i; proto_array = kmalloc(MAX_NF_CT_PROTO * @@ -296,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) } for (i = 0; i < MAX_NF_CT_PROTO; i++) - proto_array[i] = &nf_conntrack_l4proto_generic; + RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic); /* Before making proto_array visible to lockless readers, * we must make sure its content is committed to memory. -- cgit v1.1 From e9e5eee8733739f13a204132b502494b3f494f3b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 8 Nov 2010 20:05:57 +0900 Subject: IPVS: Add persistence engine to connection entry The dest of a connection may not exist if it has been created as the result of connection synchronisation. But in order for connection entries for templates with persistence engine data created through connection synchronisation to be valid access to the persistence engine pointer is required. So add the persistence engine to the connection itself. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 19 ++++++++++--------- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/ipvs/ip_vs_pe.c | 14 ++++---------- 3 files changed, 16 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index e9adecd..64a9ca3 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -176,8 +176,8 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, NULL, 0, &p); - if (cp->dest && cp->dest->svc->pe) { - p.pe = cp->dest->svc->pe; + if (cp->pe) { + p.pe = cp->pe; p.pe_data = cp->pe_data; p.pe_data_len = cp->pe_data_len; } @@ -765,6 +765,7 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->flags & IP_VS_CONN_F_NFCT) ip_vs_conn_drop_conntrack(cp); + ip_vs_pe_put(cp->pe); kfree(cp->pe_data); if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); @@ -826,7 +827,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; - if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { + if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) { + ip_vs_pe_get(p->pe); + cp->pe = p->pe; cp->pe_data = p->pe_data; cp->pe_data_len = p->pe_data_len; } @@ -958,15 +961,13 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; - if (cp->dest && cp->pe_data && - cp->dest->svc->pe->show_pe_data) { + if (cp->pe_data) { pe_data[0] = ' '; - len = strlen(cp->dest->svc->pe->name); - memcpy(pe_data + 1, cp->dest->svc->pe->name, len); + len = strlen(cp->pe->name); + memcpy(pe_data + 1, cp->pe->name, len); pe_data[len + 1] = ' '; len += 2; - len += cp->dest->svc->pe->show_pe_data(cp, - pe_data + len); + len += cp->pe->show_pe_data(cp, pe_data + len); } pe_data[len] = '\0'; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5f5daa3..3e92558 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1139,7 +1139,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, } if (u->pe_name && *u->pe_name) { - pe = ip_vs_pe_get(u->pe_name); + pe = ip_vs_pe_getbyname(u->pe_name); if (pe == NULL) { pr_info("persistence engine module ip_vs_pe_%s " "not found\n", u->pe_name); @@ -1250,7 +1250,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = sched; if (u->pe_name && *u->pe_name) { - pe = ip_vs_pe_get(u->pe_name); + pe = ip_vs_pe_getbyname(u->pe_name); if (pe == NULL) { pr_info("persistence engine module ip_vs_pe_%s " "not found\n", u->pe_name); diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 3414af7..e99f920 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -30,7 +30,7 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc) /* Get pe in the pe list by name */ static struct ip_vs_pe * -ip_vs_pe_getbyname(const char *pe_name) +__ip_vs_pe_getbyname(const char *pe_name) { struct ip_vs_pe *pe; @@ -60,28 +60,22 @@ ip_vs_pe_getbyname(const char *pe_name) } /* Lookup pe and try to load it if it doesn't exist */ -struct ip_vs_pe *ip_vs_pe_get(const char *name) +struct ip_vs_pe *ip_vs_pe_getbyname(const char *name) { struct ip_vs_pe *pe; /* Search for the pe by name */ - pe = ip_vs_pe_getbyname(name); + pe = __ip_vs_pe_getbyname(name); /* If pe not found, load the module and search again */ if (!pe) { request_module("ip_vs_pe_%s", name); - pe = ip_vs_pe_getbyname(name); + pe = __ip_vs_pe_getbyname(name); } return pe; } -void ip_vs_pe_put(struct ip_vs_pe *pe) -{ - if (pe && pe->module) - module_put(pe->module); -} - /* Register a pe in the pe list */ int register_ip_vs_pe(struct ip_vs_pe *pe) { -- cgit v1.1 From ea2c73afc23db3084fd857b027446c38fc7ff2c9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 8 Nov 2010 20:06:30 +0900 Subject: IPVS: Only match pe_data created by the same pe Only match persistence engine data if it was created by the same persistence engine. Reported-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 64a9ca3..261db1a 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -354,7 +354,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (p->pe_data && p->pe->ct_match) { - if (p->pe->ct_match(p, cp)) + if (p->pe == cp->pe && p->pe->ct_match(p, cp)) goto out; continue; } -- cgit v1.1 From d494262b8a0f3507b62104a565849124abe29827 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 09:33:15 +0900 Subject: IPVS: Make the cp argument to ip_vs_sync_conn() static Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index ab85aed..a4dccbc 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -236,7 +236,7 @@ get_curr_sync_buff(unsigned long time) * Add an ip_vs_conn information into the current sync_buff. * Called by ip_vs_in. */ -void ip_vs_sync_conn(struct ip_vs_conn *cp) +void ip_vs_sync_conn(const struct ip_vs_conn *cp) { struct ip_vs_sync_mesg *m; struct ip_vs_sync_conn *s; -- cgit v1.1 From 7ae246a15a5c9d26cfb572d36794325db0400b18 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 09:33:25 +0900 Subject: IPVS: Remove useless { } block from ip_vs_process_message() Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index a4dccbc..72b3d88 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -381,20 +381,18 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } } - { - if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, - (union nf_inet_addr *)&s->caddr, - s->cport, - (union nf_inet_addr *)&s->vaddr, - s->vport, ¶m)) { - pr_err("ip_vs_conn_fill_param_sync failed"); - return; - } - if (!(flags & IP_VS_CONN_F_TEMPLATE)) - cp = ip_vs_conn_in_get(¶m); - else - cp = ip_vs_ct_in_get(¶m); + if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport, ¶m)) { + pr_err("ip_vs_conn_fill_param_sync failed"); + return; } + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + cp = ip_vs_conn_in_get(¶m); + else + cp = ip_vs_ct_in_get(¶m); if (!cp) { /* * Find the appropriate destination for the connection. -- cgit v1.1 From 8aadf93c9c1ff1a53aafd18d038be0d709b5ebc0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 09:33:28 +0900 Subject: IPVS: buffer argument to ip_vs_process_message() should not be const It is assigned to a non-const variable and its contents are modified. Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 72b3d88..3897d6b 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -303,7 +303,7 @@ ip_vs_conn_fill_param_sync(int af, int protocol, * Process received multicast message and create the corresponding * ip_vs_conn entries. */ -static void ip_vs_process_message(const char *buffer, const size_t buflen) +static void ip_vs_process_message(char *buffer, const size_t buflen) { struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; struct ip_vs_sync_conn *s; -- cgit v1.1 From 4ecd29447e6b9c12190e21c3e44ed5b12693c467 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 18:38:52 +0100 Subject: ipvs: add static and read_mostly attributes ip_vs_conn_tab_bits & ip_vs_conn_tab_mask are static to ipvs/ip_vs_conn.c ip_vs_conn_tab_size, ip_vs_conn_tab_mask, ip_vs_conn_tab [the pointer], ip_vs_conn_rnd are mostly read. Signed-off-by: Eric Dumazet Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 261db1a..7615f9e 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -48,18 +48,18 @@ /* * Connection hash size. Default is what was selected at compile time. */ -int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; +static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); /* size and mask values */ -int ip_vs_conn_tab_size; -int ip_vs_conn_tab_mask; +int ip_vs_conn_tab_size __read_mostly; +static int ip_vs_conn_tab_mask __read_mostly; /* * Connection hash table: for input and output packets lookups of IPVS */ -static struct list_head *ip_vs_conn_tab; +static struct list_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; @@ -71,7 +71,7 @@ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); /* random value for IPVS connection hash */ -static unsigned int ip_vs_conn_rnd; +static unsigned int ip_vs_conn_rnd __read_mostly; /* * Fine locking granularity for big connection hash table -- cgit v1.1 From a333e2ec05791bb866086274ac9749315900a0a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 19:46:33 +0100 Subject: ipvs: remove shadow rt variable Remove a sparse warning about rt variable. Signed-off-by: Eric Dumazet Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 10bd39c..50b131c 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -188,7 +188,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb) }, .mark = skb->mark, }; - struct rtable *rt; if (ip_route_output_key(net, &rt, &fl)) return 0; -- cgit v1.1 From 8f1b03a4c18e8f3f0801447b62330faa8ed3bb37 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 9 Nov 2010 10:08:49 +0900 Subject: ipvs: allow transmit of GRO aggregated skbs Attempt at allowing LVS to transmit skbs of greater than MTU length that have been aggregated by GRO and can thus be deaggregated by GSO. Cc: Julian Anastasov Cc: Herbert Xu Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 50b131c..fb2a445 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -407,7 +407,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && + !skb_is_gso(skb)) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); @@ -460,7 +461,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -560,7 +561,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && + !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); @@ -675,7 +677,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -790,8 +792,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, df |= (old_iph->frag_off & htons(IP_DF)); - if ((old_iph->frag_off & htons(IP_DF)) - && mtu < ntohs(old_iph->tot_len)) { + if ((old_iph->frag_off & htons(IP_DF) && + mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; @@ -903,7 +905,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); - if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { + if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && + !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); @@ -1008,7 +1011,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { + if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && + !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); ip_rt_put(rt); IP_VS_DBG_RL("%s(): frag needed\n", __func__); @@ -1175,7 +1179,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { + if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && + !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; @@ -1289,7 +1294,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->dst); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); -- cgit v1.1 From 3bfd45f93c8bca7a5dc955235ff083602d95aa43 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Nov 2010 10:19:18 +0100 Subject: netfilter: nf_conntrack: one less atomic op in nf_ct_expect_insert() Instead of doing atomic_inc(&exp->use) twice, call atomic_add(2, &exp->use); Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index bbb2140..774f32b 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -323,7 +323,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); - atomic_inc(&exp->use); + /* two references : one for hash insert, one for the timer */ + atomic_add(2, &exp->use); if (master_help) { hlist_add_head(&exp->lnode, &master_help->expectations); @@ -345,7 +346,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) } add_timer(&exp->timeout); - atomic_inc(&exp->use); NF_CT_STAT_INC(net, expect_create); } -- cgit v1.1 From 0e051e683ba4acb4e67c272c6a89707d974099d1 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:07 +0100 Subject: IPVS: Backup, Prepare for transferring firewall marks (fwmark) to the backup daemon. One struct will have fwmark added: * ip_vs_conn ip_vs_conn_new() and ip_vs_find_dest() will have an extra param - fwmark The effects of that, is in this patch. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 5 +++-- net/netfilter/ipvs/ip_vs_core.c | 8 ++++---- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/ipvs/ip_vs_ftp.c | 5 +++-- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- 5 files changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7615f9e..66e4662 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -613,7 +613,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) if ((cp) && (!cp->dest)) { dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, - cp->protocol); + cp->protocol, cp->fwmark); ip_vs_bind_dest(cp, dest); return dest; } else @@ -803,7 +803,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) struct ip_vs_conn * ip_vs_conn_new(const struct ip_vs_conn_param *p, const union nf_inet_addr *daddr, __be16 dport, unsigned flags, - struct ip_vs_dest *dest) + struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); @@ -827,6 +827,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; + cp->fwmark = fwmark; if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) { ip_vs_pe_get(p->pe); cp->pe = p->pe; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index b4e51e9..e2bb3cd 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -293,7 +293,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * and thus param.pe_data will be destroyed * when the template expires */ ct = ip_vs_conn_new(¶m, &dest->addr, dport, - IP_VS_CONN_F_TEMPLATE, dest); + IP_VS_CONN_F_TEMPLATE, dest, skb->mark); if (ct == NULL) { kfree(param.pe_data); return NULL; @@ -319,7 +319,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], &iph.daddr, ports[1], ¶m); - cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest); + cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { ip_vs_conn_put(ct); return NULL; @@ -423,7 +423,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, pptr[0], &iph.daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], - flags, dest); + flags, dest, skb->mark); if (!cp) return NULL; } @@ -489,7 +489,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, &iph.daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &daddr, 0, IP_VS_CONN_F_BYPASS | flags, - NULL); + NULL, skb->mark); if (!cp) return NF_DROP; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 3e92558..a5bd002 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -657,12 +657,12 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, - __be16 vport, __u16 protocol) + __be16 vport, __u16 protocol, __u32 fwmark) { struct ip_vs_dest *dest; struct ip_vs_service *svc; - svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); + svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 7545500..84aef65 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -208,7 +208,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, n_cp = ip_vs_conn_new(&p, &from, port, IP_VS_CONN_F_NO_CPORT | IP_VS_CONN_F_NFCT, - cp->dest); + cp->dest, skb->mark); if (!n_cp) return 0; @@ -365,7 +365,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, if (!n_cp) { n_cp = ip_vs_conn_new(&p, &cp->daddr, htons(ntohs(cp->dport)-1), - IP_VS_CONN_F_NFCT, cp->dest); + IP_VS_CONN_F_NFCT, cp->dest, + skb->mark); if (!n_cp) return 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3897d6b..47eed67 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -404,7 +404,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) s->dport, (union nf_inet_addr *)&s->vaddr, s->vport, - s->protocol); + s->protocol, 0); /* Set the approprite ativity flag */ if (s->protocol == IPPROTO_TCP) { if (state != IP_VS_TCP_S_ESTABLISHED) @@ -419,7 +419,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) } cp = ip_vs_conn_new(¶m, (union nf_inet_addr *)&s->daddr, - s->dport, flags, dest); + s->dport, flags, dest, 0); if (dest) atomic_dec(&dest->refcnt); if (!cp) { -- cgit v1.1 From ce144f249f3f21a095a093d5d1ebd845177858da Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:08 +0100 Subject: IPVS: Split ports[2] into src_port and dst_port Avoid sending invalid pointer due to skb_linearize() call. This patch prepares for next patch where skb_linearize is a part. In ip_vs_sched_persist() params the ports ptr will be replaced by src and dst port. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e2bb3cd..9acdd79 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -200,7 +200,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, struct sk_buff *skb, - __be16 ports[2]) + __be16 src_port, __be16 dst_port) { struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; @@ -224,8 +224,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", - IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), - IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), + IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), + IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); /* @@ -247,14 +247,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; __be16 vport = 0; - if (ports[1] == svc->port) { + if (dst_port == svc->port) { /* non-FTP template: * * FTP template: * */ if (svc->port != FTPPORT) - vport = ports[1]; + vport = dst_port; } else { /* Note: persistent fwmark-based services and * persistent port zero service are handled here. @@ -285,7 +285,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, return NULL; } - if (ports[1] == svc->port && svc->port != FTPPORT) + if (dst_port == svc->port && svc->port != FTPPORT) dport = dest->port; /* Create a template @@ -306,7 +306,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, kfree(param.pe_data); } - dport = ports[1]; + dport = dst_port; if (dport == svc->port && dest->port) dport = dest->port; @@ -317,8 +317,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], - &iph.daddr, ports[1], ¶m); + ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port, + &iph.daddr, dst_port, ¶m); + cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { ip_vs_conn_put(ct); @@ -388,7 +389,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) { *ignored = 0; - return ip_vs_sched_persist(svc, skb, pptr); + return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1]); } /* -- cgit v1.1 From 3716522653a79b724b02ee911f1b60c41932f847 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:09 +0100 Subject: IPVS: skb defrag in L7 helpers L7 helpers like sip needs skb defrag since L7 data can be fragmented. This patch requires "IPVS Break ports-2 into src_port and dst_port" patch Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_pe_sip.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index b8b4e96..0d83bc0 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; + int retc; ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); @@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) if (dataoff >= skb->len) return -EINVAL; + if ((retc=skb_linearize(skb)) < 0) + return retc; dptr = skb->data + dataoff; datalen = skb->len - dataoff; -- cgit v1.1 From a5959d53d6048a56103ee0ade1eb6f2c0c733b1d Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:10 +0100 Subject: IPVS: Handle Scheduling errors. If ip_vs_conn_fill_param_persist return an error to ip_vs_sched_persist, this error must propagate as ignored=-1 to ip_vs_schedule(). Errors from ip_vs_conn_new() in ip_vs_sched_persist() and ip_vs_schedule() should also return *ignored=-1; This patch just relies on the fact that ignored is 1 before calling ip_vs_sched_persist(). Sent from Julian: "The new case when ip_vs_conn_fill_param_persist fails should set *ignored = -1, so that we can use NF_DROP, see below. *ignored = -1 should be also used for ip_vs_conn_new failure in ip_vs_sched_persist() and ip_vs_schedule(). The new negative value should be handled in tcp,udp,sctp" "To summarize: - *ignored = 1: protocol tried to schedule (eg. on SYN), found svc but the svc/scheduler decides that this packet should be accepted with NF_ACCEPT because it must not be scheduled. - *ignored = 0: scheduler can not find destination, so try bypass or return ICMP and then NF_DROP (ip_vs_leave). - *ignored = -1: scheduler tried to schedule but fatal error occurred, eg. ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param failure such as missing Call-ID, ENOMEM on skb_linearize or pe_data. In this case we should return NF_DROP without any attempts to send ICMP with ip_vs_leave." More or less all ideas and input to this patch is work from Julian Anastasov Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 56 +++++++++++++++++++++++++---------- net/netfilter/ipvs/ip_vs_proto_sctp.c | 11 +++++-- net/netfilter/ipvs/ip_vs_proto_tcp.c | 10 +++++-- net/netfilter/ipvs/ip_vs_proto_udp.c | 10 +++++-- 4 files changed, 64 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 9acdd79..3445da6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -177,7 +177,7 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction, return pp->state_transition(cp, direction, skb, pp); } -static inline void +static inline int ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, struct sk_buff *skb, int protocol, const union nf_inet_addr *caddr, __be16 cport, @@ -187,7 +187,9 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); p->pe = svc->pe; if (p->pe && p->pe->fill_param) - p->pe->fill_param(p, skb); + return p->pe->fill_param(p, skb); + + return 0; } /* @@ -200,7 +202,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, struct sk_buff *skb, - __be16 src_port, __be16 dst_port) + __be16 src_port, __be16 dst_port, int *ignored) { struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; @@ -268,20 +270,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc, vaddr = &fwmark; } } - ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, - vaddr, vport, ¶m); + /* return *ignored = -1 so NF_DROP can be used */ + if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, + vaddr, vport, ¶m) < 0) { + *ignored = -1; + return NULL; + } } /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); if (!ct || !ip_vs_check_template(ct)) { - /* No template found or the dest of the connection + /* + * No template found or the dest of the connection * template is not available. + * return *ignored=0 i.e. ICMP and NF_DROP */ dest = svc->scheduler->schedule(svc, skb); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); + *ignored = 0; return NULL; } @@ -296,6 +305,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); if (ct == NULL) { kfree(param.pe_data); + *ignored = -1; return NULL; } @@ -323,6 +333,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { ip_vs_conn_put(ct); + *ignored = -1; return NULL; } @@ -342,6 +353,21 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * It selects a server according to the virtual service, and * creates a connection entry. * Protocols supported: TCP, UDP + * + * Usage of *ignored + * + * 1 : protocol tried to schedule (eg. on SYN), found svc but the + * svc/scheduler decides that this packet should be accepted with + * NF_ACCEPT because it must not be scheduled. + * + * 0 : scheduler can not find destination, so try bypass or + * return ICMP and then NF_DROP (ip_vs_leave). + * + * -1 : scheduler tried to schedule but fatal error occurred, eg. + * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param + * failure such as missing Call-ID, ENOMEM on skb_linearize + * or pe_data. In this case we should return NF_DROP without + * any attempts to send ICMP with ip_vs_leave. */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, @@ -372,11 +398,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } /* - * Do not schedule replies from local real server. It is risky - * for fwmark services but mostly for persistent services. + * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); @@ -387,10 +411,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * Persistent service */ - if (svc->flags & IP_VS_SVC_F_PERSISTENT) { - *ignored = 0; - return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1]); - } + if (svc->flags & IP_VS_SVC_F_PERSISTENT) + return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); + + *ignored = 0; /* * Non-persistent service @@ -403,8 +427,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return NULL; } - *ignored = 0; - dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); @@ -425,8 +447,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); - if (!cp) + if (!cp) { + *ignored = -1; return NULL; + } } IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1ea96bc..a315159 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -47,13 +47,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pp, &ignored); - if (!*cpp && !ignored) { - *verdict = ip_vs_leave(svc, skb, pp); + if (!*cpp && ignored <= 0) { + if (!ignored) + *verdict = ip_vs_leave(svc, skb, pp); + else { + ip_vs_service_put(svc); + *verdict = NF_DROP; + } return 0; } ip_vs_service_put(svc); } - + /* NF_ACCEPT */ return 1; } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index f6c5200..1cdab12 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -64,12 +64,18 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pp, &ignored); - if (!*cpp && !ignored) { - *verdict = ip_vs_leave(svc, skb, pp); + if (!*cpp && ignored <= 0) { + if (!ignored) + *verdict = ip_vs_leave(svc, skb, pp); + else { + ip_vs_service_put(svc); + *verdict = NF_DROP; + } return 0; } ip_vs_service_put(svc); } + /* NF_ACCEPT */ return 1; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 9d106a0..cd398de 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -63,12 +63,18 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pp, &ignored); - if (!*cpp && !ignored) { - *verdict = ip_vs_leave(svc, skb, pp); + if (!*cpp && ignored <= 0) { + if (!ignored) + *verdict = ip_vs_leave(svc, skb, pp); + else { + ip_vs_service_put(svc); + *verdict = NF_DROP; + } return 0; } ip_vs_service_put(svc); } + /* NF_ACCEPT */ return 1; } -- cgit v1.1 From 2981bc9a63456500037ca1f434b93a561e63f384 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:11 +0100 Subject: IPVS: Backup, Adding structs for new sync format New structs defined for version 1 of sync. * ip_vs_sync_v4 Ipv4 base format struct * ip_vs_sync_v6 Ipv6 base format struct Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 154 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 142 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 47eed67..566482f 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -43,11 +43,13 @@ #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ #define IP_VS_SYNC_PORT 8848 /* multicast port */ +#define SYNC_PROTO_VER 1 /* Protocol version in header */ /* * IPVS sync connection entry + * Version 0, i.e. original version. */ -struct ip_vs_sync_conn { +struct ip_vs_sync_conn_v0 { __u8 reserved; /* Protocol, addresses and port numbers */ @@ -71,40 +73,157 @@ struct ip_vs_sync_conn_options { struct ip_vs_seq out_seq; /* outgoing seq. struct */ }; +/* + Sync Connection format (sync_conn) + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Protocol | Ver. | Size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Flags | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | State | cport | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | vport | dport | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | fwmark | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | timeout (in sec.) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ... | + | IP-Addresses (v4 or v6) | + | ... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + Optional Parameters. + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Param. Type | Param. Length | Param. data | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | + | ... | + | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | Param Type | Param. Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Param data | + | Last Param data should be padded for 32 bit alignment | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +*/ + +/* + * Type 0, IPv4 sync connection format + */ +struct ip_vs_sync_v4 { + __u8 type; + __u8 protocol; /* Which protocol (TCP/UDP) */ + __be16 ver_size; /* Version msb 4 bits */ + /* Flags and state transition */ + __be32 flags; /* status flags */ + __be16 state; /* state info */ + /* Protocol, addresses and port numbers */ + __be16 cport; + __be16 vport; + __be16 dport; + __be32 fwmark; /* Firewall mark from skb */ + __be32 timeout; /* cp timeout */ + __be32 caddr; /* client address */ + __be32 vaddr; /* virtual address */ + __be32 daddr; /* destination address */ + /* The sequence options start here */ + /* PE data padded to 32bit alignment after seq. options */ +}; +/* + * Type 2 messages IPv6 + */ +struct ip_vs_sync_v6 { + __u8 type; + __u8 protocol; /* Which protocol (TCP/UDP) */ + __be16 ver_size; /* Version msb 4 bits */ + /* Flags and state transition */ + __be32 flags; /* status flags */ + __be16 state; /* state info */ + /* Protocol, addresses and port numbers */ + __be16 cport; + __be16 vport; + __be16 dport; + __be32 fwmark; /* Firewall mark from skb */ + __be32 timeout; /* cp timeout */ + struct in6_addr caddr; /* client address */ + struct in6_addr vaddr; /* virtual address */ + struct in6_addr daddr; /* destination address */ + /* The sequence options start here */ + /* PE data padded to 32bit alignment after seq. options */ +}; + +union ip_vs_sync_conn { + struct ip_vs_sync_v4 v4; + struct ip_vs_sync_v6 v6; +}; + +/* Bits in Type field in above */ +#define STYPE_INET6 0 +#define STYPE_F_INET6 (1 << STYPE_INET6) + +#define SVER_SHIFT 12 /* Shift to get version */ +#define SVER_MASK 0x0fff /* Mask to strip version */ + +#define IPVS_OPT_SEQ_DATA 1 +#define IPVS_OPT_PE_DATA 2 +#define IPVS_OPT_PE_NAME 3 +#define IPVS_OPT_PARAM 7 + +#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) +#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) +#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) +#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) + struct ip_vs_sync_thread_data { struct socket *sock; char *buf; }; -#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) +/* Version 0 definition of packet sizes */ +#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) #define FULL_CONN_SIZE \ -(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) +(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) /* - The master mulitcasts messages to the backup load balancers in the - following format. + The master mulitcasts messages (Datagrams) to the backup load balancers + in the following format. + + Version 1: + Note, first byte should be Zero, so ver 0 receivers will drop the packet. 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Count Conns | SyncID | Size | + | 0 | SyncID | Size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Count Conns | Version | Reserved, set to Zero | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | IPVS Sync Connection (1) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | . | - | . | + ~ . ~ | . | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | IPVS Sync Connection (n) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Version 0 Header + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Count Conns | SyncID | Size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | IPVS Sync Connection (1) | */ #define SYNC_MESG_HEADER_LEN 4 #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ +/* Version 0 header */ struct ip_vs_sync_mesg { __u8 nr_conns; __u8 syncid; @@ -113,6 +232,17 @@ struct ip_vs_sync_mesg { /* ip_vs_sync_conn entries start here */ }; +/* Version 1 header */ +struct ip_vs_sync_mesg_v2 { + __u8 reserved; /* must be zero */ + __u8 syncid; + __u16 size; + __u8 nr_conns; + __s8 version; /* SYNC_PROTO_VER */ + __u16 spare; + /* ip_vs_sync_conn entries start here */ +}; + /* the maximum length of sync (sending/receiving) message */ static int sync_send_mesg_maxlen; static int sync_recv_mesg_maxlen; @@ -239,7 +369,7 @@ get_curr_sync_buff(unsigned long time) void ip_vs_sync_conn(const struct ip_vs_conn *cp) { struct ip_vs_sync_mesg *m; - struct ip_vs_sync_conn *s; + struct ip_vs_sync_conn_v0 *s; int len; spin_lock(&curr_sb_lock); @@ -254,7 +384,7 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp) len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : SIMPLE_CONN_SIZE; m = curr_sb->mesg; - s = (struct ip_vs_sync_conn *)curr_sb->head; + s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; /* copy members */ s->protocol = cp->protocol; @@ -306,7 +436,7 @@ ip_vs_conn_fill_param_sync(int af, int protocol, static void ip_vs_process_message(char *buffer, const size_t buflen) { struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; - struct ip_vs_sync_conn *s; + struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_conn_options *opt; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; @@ -343,7 +473,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) IP_VS_ERR_RL("bogus conn in sync message\n"); return; } - s = (struct ip_vs_sync_conn *) p; + s = (struct ip_vs_sync_conn_v0 *) p; flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; flags &= ~IP_VS_CONN_F_HASHED; if (flags & IP_VS_CONN_F_SEQ_MASK) { @@ -849,7 +979,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", - sizeof(struct ip_vs_sync_conn)); + sizeof(struct ip_vs_sync_conn_v0)); if (state == IP_VS_STATE_MASTER) { if (sync_master_thread) -- cgit v1.1 From fe5e7a1efb664df0280f10377813d7099fb7eb0f Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:12 +0100 Subject: IPVS: Backup, Adding Version 1 receive capability Functionality improvements * flags changed from 16 to 32 bits * fwmark added (32 bits) * timeout in sec. added (32 bits) * pe data added (Variable length) * IPv6 capabilities (3x16 bytes for addr.) * Version and type in every conn msg. ip_vs_process_message() now handles Version 1 messages and will call ip_vs_process_message_v0() for version 0 messages. ip_vs_proc_conn() is common for both version, and handles the update of connection hash. ip_vs_conn_fill_param_sync() - Version 1 messages only ip_vs_conn_fill_param_sync_v0() - Version 0 messages only Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_pe.c | 5 +- net/netfilter/ipvs/ip_vs_sync.c | 549 +++++++++++++++++++++++++++++++--------- 2 files changed, 431 insertions(+), 123 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index e99f920..5cf859c 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc) } /* Get pe in the pe list by name */ -static struct ip_vs_pe * -__ip_vs_pe_getbyname(const char *pe_name) +struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) { struct ip_vs_pe *pe; - IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, + IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, pe_name); spin_lock_bh(&ip_vs_pe_lock); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 566482f..e071508 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -35,6 +35,8 @@ #include #include +#include /* Used for ntoh_seq and hton_seq */ + #include #include @@ -286,6 +288,16 @@ static struct sockaddr_in mcast_addr = { .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), }; +/* + * Copy of struct ip_vs_seq + * From unaligned network order to aligned host order + */ +static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) +{ + ho->init_seq = get_unaligned_be32(&no->init_seq); + ho->delta = get_unaligned_be32(&no->delta); + ho->previous_delta = get_unaligned_be32(&no->previous_delta); +} static inline struct ip_vs_sync_buff *sb_dequeue(void) { @@ -418,59 +430,186 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp) ip_vs_sync_conn(cp->control); } +/* + * fill_param used by version 1 + */ static inline int -ip_vs_conn_fill_param_sync(int af, int protocol, - const union nf_inet_addr *caddr, __be16 cport, - const union nf_inet_addr *vaddr, __be16 vport, - struct ip_vs_conn_param *p) +ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, + struct ip_vs_conn_param *p, + __u8 *pe_data, unsigned int pe_data_len, + __u8 *pe_name, unsigned int pe_name_len) { - /* XXX: Need to take into account persistence engine */ - ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ip_vs_conn_fill_param(af, sc->v6.protocol, + (const union nf_inet_addr *)&sc->v6.caddr, + sc->v6.cport, + (const union nf_inet_addr *)&sc->v6.vaddr, + sc->v6.vport, p); + else +#endif + ip_vs_conn_fill_param(af, sc->v4.protocol, + (const union nf_inet_addr *)&sc->v4.caddr, + sc->v4.cport, + (const union nf_inet_addr *)&sc->v4.vaddr, + sc->v4.vport, p); + /* Handle pe data */ + if (pe_data_len) { + if (pe_name_len) { + char buff[IP_VS_PENAME_MAXLEN+1]; + + memcpy(buff, pe_name, pe_name_len); + buff[pe_name_len]=0; + p->pe = __ip_vs_pe_getbyname(buff); + if (!p->pe) { + IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff); + return 1; + } + } else { + IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); + return 1; + } + + p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); + if (!p->pe_data) { + if (p->pe->module) + module_put(p->pe->module); + return -ENOMEM; + } + memcpy(p->pe_data, pe_data, pe_data_len); + p->pe_data_len = pe_data_len; + } return 0; } /* - * Process received multicast message and create the corresponding - * ip_vs_conn entries. + * Connection Add / Update. + * Common for version 0 and 1 reception of backup sync_conns. + * Param: ... + * timeout is in sec. + */ +static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, + unsigned state, unsigned protocol, unsigned type, + const union nf_inet_addr *daddr, __be16 dport, + unsigned long timeout, __u32 fwmark, + struct ip_vs_sync_conn_options *opt, + struct ip_vs_protocol *pp) +{ + struct ip_vs_dest *dest; + struct ip_vs_conn *cp; + + + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + cp = ip_vs_conn_in_get(param); + else + cp = ip_vs_ct_in_get(param); + + if (cp && param->pe_data) /* Free pe_data */ + kfree(param->pe_data); + if (!cp) { + /* + * Find the appropriate destination for the connection. + * If it is not found the connection will remain unbound + * but still handled. + */ + dest = ip_vs_find_dest(type, daddr, dport, param->vaddr, + param->vport, protocol, fwmark); + + /* Set the approprite ativity flag */ + if (protocol == IPPROTO_TCP) { + if (state != IP_VS_TCP_S_ESTABLISHED) + flags |= IP_VS_CONN_F_INACTIVE; + else + flags &= ~IP_VS_CONN_F_INACTIVE; + } else if (protocol == IPPROTO_SCTP) { + if (state != IP_VS_SCTP_S_ESTABLISHED) + flags |= IP_VS_CONN_F_INACTIVE; + else + flags &= ~IP_VS_CONN_F_INACTIVE; + } + cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); + if (dest) + atomic_dec(&dest->refcnt); + if (!cp) { + if (param->pe_data) + kfree(param->pe_data); + IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); + return; + } + } else if (!cp->dest) { + dest = ip_vs_try_bind_dest(cp); + if (dest) + atomic_dec(&dest->refcnt); + } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && + (cp->state != state)) { + /* update active/inactive flag for the connection */ + dest = cp->dest; + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && + (state != IP_VS_TCP_S_ESTABLISHED)) { + atomic_dec(&dest->activeconns); + atomic_inc(&dest->inactconns); + cp->flags |= IP_VS_CONN_F_INACTIVE; + } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && + (state == IP_VS_TCP_S_ESTABLISHED)) { + atomic_inc(&dest->activeconns); + atomic_dec(&dest->inactconns); + cp->flags &= ~IP_VS_CONN_F_INACTIVE; + } + } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && + (cp->state != state)) { + dest = cp->dest; + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && + (state != IP_VS_SCTP_S_ESTABLISHED)) { + atomic_dec(&dest->activeconns); + atomic_inc(&dest->inactconns); + cp->flags &= ~IP_VS_CONN_F_INACTIVE; + } + } + + if (opt) + memcpy(&cp->in_seq, opt, sizeof(*opt)); + atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); + cp->state = state; + cp->old_state = cp->state; + /* + * For Ver 0 messages style + * - Not possible to recover the right timeout for templates + * - can not find the right fwmark + * virtual service. If needed, we can do it for + * non-fwmark persistent services. + * Ver 1 messages style. + * - No problem. + */ + if (timeout) { + if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) + timeout = MAX_SCHEDULE_TIMEOUT / HZ; + cp->timeout = timeout*HZ; + } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) + cp->timeout = pp->timeout_table[state]; + else + cp->timeout = (3*60*HZ); + ip_vs_conn_put(cp); +} + +/* + * Process received multicast message for Version 0 */ -static void ip_vs_process_message(char *buffer, const size_t buflen) +static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) { struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_conn_options *opt; - struct ip_vs_conn *cp; struct ip_vs_protocol *pp; - struct ip_vs_dest *dest; struct ip_vs_conn_param param; char *p; int i; - if (buflen < sizeof(struct ip_vs_sync_mesg)) { - IP_VS_ERR_RL("sync message header too short\n"); - return; - } - - /* Convert size back to host byte order */ - m->size = ntohs(m->size); - - if (buflen != m->size) { - IP_VS_ERR_RL("bogus sync message size\n"); - return; - } - - /* SyncID sanity check */ - if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { - IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", - m->syncid); - return; - } - p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); for (i=0; inr_conns; i++) { unsigned flags, state; if (p + SIMPLE_CONN_SIZE > buffer+buflen) { - IP_VS_ERR_RL("bogus conn in sync message\n"); + IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); return; } s = (struct ip_vs_sync_conn_v0 *) p; @@ -480,7 +619,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) opt = (struct ip_vs_sync_conn_options *)&s[1]; p += FULL_CONN_SIZE; if (p > buffer+buflen) { - IP_VS_ERR_RL("bogus conn options in sync message\n"); + IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); return; } } else { @@ -492,12 +631,12 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) if (!(flags & IP_VS_CONN_F_TEMPLATE)) { pp = ip_vs_proto_get(s->protocol); if (!pp) { - IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", + IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", s->protocol); continue; } if (state >= pp->num_states) { - IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", + IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", pp->name, state); continue; } @@ -505,103 +644,273 @@ static void ip_vs_process_message(char *buffer, const size_t buflen) /* protocol in templates is not used for state/timeout */ pp = NULL; if (state > 0) { - IP_VS_DBG(2, "Invalid template state %u in sync msg\n", + IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", state); state = 0; } } - if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, - (union nf_inet_addr *)&s->caddr, - s->cport, - (union nf_inet_addr *)&s->vaddr, - s->vport, ¶m)) { - pr_err("ip_vs_conn_fill_param_sync failed"); - return; + ip_vs_conn_fill_param(AF_INET, s->protocol, + (const union nf_inet_addr *)&s->caddr, + s->cport, + (const union nf_inet_addr *)&s->vaddr, + s->vport, ¶m); + + /* Send timeout as Zero */ + ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET, + (union nf_inet_addr *)&s->daddr, s->dport, + 0, 0, opt, pp); + } +} + +/* + * Handle options + */ +static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, + __u32 *opt_flags, + struct ip_vs_sync_conn_options *opt) +{ + struct ip_vs_sync_conn_options *topt; + + topt = (struct ip_vs_sync_conn_options *)p; + + if (plen != sizeof(struct ip_vs_sync_conn_options)) { + IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); + return -EINVAL; + } + if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { + IP_VS_DBG(2, "BACKUP, conn options found twice\n"); + return -EINVAL; + } + ntoh_seq(&topt->in_seq, &opt->in_seq); + ntoh_seq(&topt->out_seq, &opt->out_seq); + *opt_flags |= IPVS_OPT_F_SEQ_DATA; + return 0; +} + +static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, + __u8 **data, unsigned int maxlen, + __u32 *opt_flags, __u32 flag) +{ + if (plen > maxlen) { + IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); + return -EINVAL; + } + if (*opt_flags & flag) { + IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); + return -EINVAL; + } + *data_len = plen; + *data = p; + *opt_flags |= flag; + return 0; +} +/* + * Process a Version 1 sync. connection + */ +static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) +{ + struct ip_vs_sync_conn_options opt; + union ip_vs_sync_conn *s; + struct ip_vs_protocol *pp; + struct ip_vs_conn_param param; + __u32 flags; + unsigned int af, state, pe_data_len=0, pe_name_len=0; + __u8 *pe_data=NULL, *pe_name=NULL; + __u32 opt_flags=0; + int retc=0; + + s = (union ip_vs_sync_conn *) p; + + if (s->v6.type & STYPE_F_INET6) { +#ifdef CONFIG_IP_VS_IPV6 + af = AF_INET6; + p += sizeof(struct ip_vs_sync_v6); +#else + IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); + retc = 10; + goto out; +#endif + } else if (!s->v4.type) { + af = AF_INET; + p += sizeof(struct ip_vs_sync_v4); + } else { + return -10; + } + if (p > msg_end) + return -20; + + /* Process optional params check Type & Len. */ + while (p < msg_end) { + int ptype; + int plen; + + if (p+2 > msg_end) + return -30; + ptype = *(p++); + plen = *(p++); + + if (!plen || ((p + plen) > msg_end)) + return -40; + /* Handle seq option p = param data */ + switch (ptype & ~IPVS_OPT_F_PARAM) { + case IPVS_OPT_SEQ_DATA: + if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) + return -50; + break; + + case IPVS_OPT_PE_DATA: + if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, + IP_VS_PEDATA_MAXLEN, &opt_flags, + IPVS_OPT_F_PE_DATA)) + return -60; + break; + + case IPVS_OPT_PE_NAME: + if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, + IP_VS_PENAME_MAXLEN, &opt_flags, + IPVS_OPT_F_PE_NAME)) + return -70; + break; + + default: + /* Param data mandatory ? */ + if (!(ptype & IPVS_OPT_F_PARAM)) { + IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", + ptype & ~IPVS_OPT_F_PARAM); + retc = 20; + goto out; + } } - if (!(flags & IP_VS_CONN_F_TEMPLATE)) - cp = ip_vs_conn_in_get(¶m); - else - cp = ip_vs_ct_in_get(¶m); - if (!cp) { - /* - * Find the appropriate destination for the connection. - * If it is not found the connection will remain unbound - * but still handled. - */ - dest = ip_vs_find_dest(AF_INET, - (union nf_inet_addr *)&s->daddr, - s->dport, - (union nf_inet_addr *)&s->vaddr, - s->vport, - s->protocol, 0); - /* Set the approprite ativity flag */ - if (s->protocol == IPPROTO_TCP) { - if (state != IP_VS_TCP_S_ESTABLISHED) - flags |= IP_VS_CONN_F_INACTIVE; - else - flags &= ~IP_VS_CONN_F_INACTIVE; - } else if (s->protocol == IPPROTO_SCTP) { - if (state != IP_VS_SCTP_S_ESTABLISHED) - flags |= IP_VS_CONN_F_INACTIVE; - else - flags &= ~IP_VS_CONN_F_INACTIVE; + p += plen; /* Next option */ + } + + /* Get flags and Mask off unsupported */ + flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; + flags |= IP_VS_CONN_F_SYNC; + state = ntohs(s->v4.state); + + if (!(flags & IP_VS_CONN_F_TEMPLATE)) { + pp = ip_vs_proto_get(s->v4.protocol); + if (!pp) { + IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", + s->v4.protocol); + retc = 30; + goto out; + } + if (state >= pp->num_states) { + IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", + pp->name, state); + retc = 40; + goto out; + } + } else { + /* protocol in templates is not used for state/timeout */ + pp = NULL; + if (state > 0) { + IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", + state); + state = 0; + } + } + if (ip_vs_conn_fill_param_sync(af, s, ¶m, + pe_data, pe_data_len, + pe_name, pe_name_len)) { + retc = 50; + goto out; + } + /* If only IPv4, just silent skip IPv6 */ + if (af == AF_INET) + ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af, + (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, + ntohl(s->v4.timeout), ntohl(s->v4.fwmark), + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), + pp); +#ifdef CONFIG_IP_VS_IPV6 + else + ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af, + (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, + ntohl(s->v6.timeout), ntohl(s->v6.fwmark), + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), + pp); +#endif + return 0; + /* Error exit */ +out: + IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); + return retc; + +} +/* + * Process received multicast message and create the corresponding + * ip_vs_conn entries. + * Handles Version 0 & 1 + */ +static void ip_vs_process_message(__u8 *buffer, const size_t buflen) +{ + struct ip_vs_sync_mesg_v2 *m2 = (struct ip_vs_sync_mesg_v2 *)buffer; + __u8 *p, *msg_end; + unsigned int i, nr_conns; + + if (buflen < sizeof(struct ip_vs_sync_mesg)) { + IP_VS_DBG(2, "BACKUP, message header too short\n"); + return; + } + /* Convert size back to host byte order */ + m2->size = ntohs(m2->size); + + if (buflen != m2->size) { + IP_VS_DBG(2, "BACKUP, bogus message size\n"); + return; + } + /* SyncID sanity check */ + if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) { + IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); + return; + } + /* Handle version 1 message */ + if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) + && (m2->spare == 0)) { + + msg_end = buffer + sizeof(struct ip_vs_sync_mesg_v2); + nr_conns = m2->nr_conns; + + for (i=0; iv4) > buffer+buflen) { + IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); + return; } - cp = ip_vs_conn_new(¶m, - (union nf_inet_addr *)&s->daddr, - s->dport, flags, dest, 0); - if (dest) - atomic_dec(&dest->refcnt); - if (!cp) { - pr_err("ip_vs_conn_new failed\n"); + s = (union ip_vs_sync_conn *)p; + size = ntohs(s->v4.ver_size) & SVER_MASK; + msg_end = p + size; + /* Basic sanity checks */ + if (msg_end > buffer+buflen) { + IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); return; } - } else if (!cp->dest) { - dest = ip_vs_try_bind_dest(cp); - if (dest) - atomic_dec(&dest->refcnt); - } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && - (cp->state != state)) { - /* update active/inactive flag for the connection */ - dest = cp->dest; - if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && - (state != IP_VS_TCP_S_ESTABLISHED)) { - atomic_dec(&dest->activeconns); - atomic_inc(&dest->inactconns); - cp->flags |= IP_VS_CONN_F_INACTIVE; - } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && - (state == IP_VS_TCP_S_ESTABLISHED)) { - atomic_inc(&dest->activeconns); - atomic_dec(&dest->inactconns); - cp->flags &= ~IP_VS_CONN_F_INACTIVE; + if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { + IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", + ntohs(s->v4.ver_size) >> SVER_SHIFT); + return; } - } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && - (cp->state != state)) { - dest = cp->dest; - if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && - (state != IP_VS_SCTP_S_ESTABLISHED)) { - atomic_dec(&dest->activeconns); - atomic_inc(&dest->inactconns); - cp->flags &= ~IP_VS_CONN_F_INACTIVE; + /* Process a single sync_conn */ + if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) { + IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", + retc); + return; } + /* Make sure we have 32 bit alignment */ + msg_end = p + ((size + 3) & ~3); } - - if (opt) - memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); - cp->state = state; - cp->old_state = cp->state; - /* - * We can not recover the right timeout for templates - * in all cases, we can not find the right fwmark - * virtual service. If needed, we can do it for - * non-fwmark persistent services. - */ - if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) - cp->timeout = pp->timeout_table[state]; - else - cp->timeout = (3*60*HZ); - ip_vs_conn_put(cp); + } else { + /* Old type of message */ + ip_vs_process_message_v0(buffer, buflen); + return; } } -- cgit v1.1 From 986a075795339c5ea1122ce9290dfd5504252eb0 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Fri, 19 Nov 2010 14:25:13 +0100 Subject: IPVS: Backup, Change sending to Version 1 format Enable sending and removal of version 0 sending Affected functions, ip_vs_sync_buff_create() ip_vs_sync_conn() ip_vs_core.c removal of IPv4 check. *v5 Just check cp->pe_data_len in ip_vs_sync_conn Check if padding needed before adding a new sync_conn to the buffer, i.e. avoid sending padding at the end. *v4 moved sanity check and pe_name_len after sloop. use cp->pe instead of cp->dest->svc->pe real length in each sync_conn, not padded length however total size of a sync_msg includes padding. *v3 Sending ip_vs_sync_conn_options in network order. Sending Templates for ONE_PACKET conn. Renaming of ip_vs_sync_mesg to ip_vs_sync_mesg_v0 Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 13 ++- net/netfilter/ipvs/ip_vs_sync.c | 189 +++++++++++++++++++++++++++++++--------- 2 files changed, 155 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 3445da6..5287771 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1560,9 +1560,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * * Sync connection if it is about to close to * encorage the standby servers to update the connections timeout + * + * For ONE_PKT let ip_vs_sync_conn() do the filter work. */ - pkts = atomic_add_return(1, &cp->in_pkts); - if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + pkts = sysctl_ip_vs_sync_threshold[0]; + else + pkts = atomic_add_return(1, &cp->in_pkts); + + if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && (pkts % sysctl_ip_vs_sync_threshold[1] @@ -1577,8 +1583,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } /* Keep this block last: TCP and others with pp->num_states <= 1 */ - else if (af == AF_INET && - (ip_vs_sync_state & IP_VS_STATE_MASTER) && + else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && (pkts % sysctl_ip_vs_sync_threshold[1] diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e071508..df5abf0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -226,7 +226,7 @@ struct ip_vs_sync_thread_data { #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ /* Version 0 header */ -struct ip_vs_sync_mesg { +struct ip_vs_sync_mesg_v0 { __u8 nr_conns; __u8 syncid; __u16 size; @@ -235,7 +235,7 @@ struct ip_vs_sync_mesg { }; /* Version 1 header */ -struct ip_vs_sync_mesg_v2 { +struct ip_vs_sync_mesg { __u8 reserved; /* must be zero */ __u8 syncid; __u16 size; @@ -299,6 +299,17 @@ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) ho->previous_delta = get_unaligned_be32(&no->previous_delta); } +/* + * Copy of struct ip_vs_seq + * From Aligned host order to unaligned network order + */ +static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) +{ + put_unaligned_be32(ho->init_seq, &no->init_seq); + put_unaligned_be32(ho->delta, &no->delta); + put_unaligned_be32(ho->previous_delta, &no->previous_delta); +} + static inline struct ip_vs_sync_buff *sb_dequeue(void) { struct ip_vs_sync_buff *sb; @@ -317,6 +328,9 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void) return sb; } +/* + * Create a new sync buffer for Version 1 proto. + */ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) { struct ip_vs_sync_buff *sb; @@ -328,11 +342,15 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) kfree(sb); return NULL; } - sb->mesg->nr_conns = 0; + sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ + sb->mesg->version = SYNC_PROTO_VER; sb->mesg->syncid = ip_vs_master_syncid; - sb->mesg->size = 4; - sb->head = (unsigned char *)sb->mesg + 4; + sb->mesg->size = sizeof(struct ip_vs_sync_mesg); + sb->mesg->nr_conns = 0; + sb->mesg->spare = 0; + sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; + sb->firstuse = jiffies; return sb; } @@ -373,18 +391,60 @@ get_curr_sync_buff(unsigned long time) return sb; } - /* * Add an ip_vs_conn information into the current sync_buff. * Called by ip_vs_in. + * Sending Version 1 messages */ -void ip_vs_sync_conn(const struct ip_vs_conn *cp) +void ip_vs_sync_conn(struct ip_vs_conn *cp) { struct ip_vs_sync_mesg *m; - struct ip_vs_sync_conn_v0 *s; - int len; + union ip_vs_sync_conn *s; + __u8 *p; + unsigned int len, pe_name_len, pad; + + /* Do not sync ONE PACKET */ + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + goto control; +sloop: + /* Sanity checks */ + pe_name_len = 0; + if (cp->pe_data_len) { + if (!cp->pe_data || !cp->dest) { + IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); + return; + } + pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); + } spin_lock(&curr_sb_lock); + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + len = sizeof(struct ip_vs_sync_v6); + else +#endif + len = sizeof(struct ip_vs_sync_v4); + + if (cp->flags & IP_VS_CONN_F_SEQ_MASK) + len += sizeof(struct ip_vs_sync_conn_options) + 2; + + if (cp->pe_data_len) + len += cp->pe_data_len + 2; /* + Param hdr field */ + if (pe_name_len) + len += pe_name_len + 2; + + /* check if there is a space for this one */ + pad = 0; + if (curr_sb) { + pad = (4 - (size_t)curr_sb->head) & 3; + if (curr_sb->head + len + pad > curr_sb->end) { + sb_queue_tail(curr_sb); + curr_sb = NULL; + pad = 0; + } + } + if (!curr_sb) { if (!(curr_sb=ip_vs_sync_buff_create())) { spin_unlock(&curr_sb_lock); @@ -393,41 +453,84 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp) } } - len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : - SIMPLE_CONN_SIZE; m = curr_sb->mesg; - s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; - - /* copy members */ - s->protocol = cp->protocol; - s->cport = cp->cport; - s->vport = cp->vport; - s->dport = cp->dport; - s->caddr = cp->caddr.ip; - s->vaddr = cp->vaddr.ip; - s->daddr = cp->daddr.ip; - s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); - s->state = htons(cp->state); - if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { - struct ip_vs_sync_conn_options *opt = - (struct ip_vs_sync_conn_options *)&s[1]; - memcpy(opt, &cp->in_seq, sizeof(*opt)); - } - + p = curr_sb->head; + curr_sb->head += pad + len; + m->size += pad + len; + /* Add ev. padding from prev. sync_conn */ + while (pad--) + *(p++) = 0; + + s = (union ip_vs_sync_conn *)p; + + /* Set message type & copy members */ + s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); + s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ + s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); + s->v4.state = htons(cp->state); + s->v4.protocol = cp->protocol; + s->v4.cport = cp->cport; + s->v4.vport = cp->vport; + s->v4.dport = cp->dport; + s->v4.fwmark = htonl(cp->fwmark); + s->v4.timeout = htonl(cp->timeout / HZ); m->nr_conns++; - m->size += len; - curr_sb->head += len; - /* check if there is a space for next one */ - if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { - sb_queue_tail(curr_sb); - curr_sb = NULL; +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) { + p += sizeof(struct ip_vs_sync_v6); + ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6); + ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6); + ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6); + } else +#endif + { + p += sizeof(struct ip_vs_sync_v4); /* options ptr */ + s->v4.caddr = cp->caddr.ip; + s->v4.vaddr = cp->vaddr.ip; + s->v4.daddr = cp->daddr.ip; + } + if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { + *(p++) = IPVS_OPT_SEQ_DATA; + *(p++) = sizeof(struct ip_vs_sync_conn_options); + hton_seq((struct ip_vs_seq *)p, &cp->in_seq); + p += sizeof(struct ip_vs_seq); + hton_seq((struct ip_vs_seq *)p, &cp->out_seq); + p += sizeof(struct ip_vs_seq); } + /* Handle pe data */ + if (cp->pe_data_len && cp->pe_data) { + *(p++) = IPVS_OPT_PE_DATA; + *(p++) = cp->pe_data_len; + memcpy(p, cp->pe_data, cp->pe_data_len); + p += cp->pe_data_len; + if (pe_name_len) { + /* Add PE_NAME */ + *(p++) = IPVS_OPT_PE_NAME; + *(p++) = pe_name_len; + memcpy(p, cp->pe->name, pe_name_len); + p += pe_name_len; + } + } + spin_unlock(&curr_sb_lock); +control: /* synchronize its controller if it has */ - if (cp->control) - ip_vs_sync_conn(cp->control); + cp = cp->control; + if (!cp) + return; + /* + * Reduce sync rate for templates + * i.e only increment in_pkts for Templates. + */ + if (cp->flags & IP_VS_CONN_F_TEMPLATE) { + int pkts = atomic_add_return(1, &cp->in_pkts); + + if (pkts % sysctl_ip_vs_sync_threshold[1] != 1) + return; + } + goto sloop; } /* @@ -596,7 +699,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, */ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) { - struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; + struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_conn_options *opt; struct ip_vs_protocol *pp; @@ -604,7 +707,7 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) char *p; int i; - p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); + p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); for (i=0; inr_conns; i++) { unsigned flags, state; @@ -848,11 +951,11 @@ out: */ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) { - struct ip_vs_sync_mesg_v2 *m2 = (struct ip_vs_sync_mesg_v2 *)buffer; + struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; - unsigned int i, nr_conns; + int i, nr_conns; - if (buflen < sizeof(struct ip_vs_sync_mesg)) { + if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { IP_VS_DBG(2, "BACKUP, message header too short\n"); return; } @@ -872,7 +975,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) && (m2->spare == 0)) { - msg_end = buffer + sizeof(struct ip_vs_sync_mesg_v2); + msg_end = buffer + sizeof(struct ip_vs_sync_mesg); nr_conns = m2->nr_conns; for (i=0; i Date: Fri, 19 Nov 2010 14:25:14 +0100 Subject: IPVS: Backup, adding version 0 sending capabilities This patch adds a sysclt net.ipv4.vs.sync_version that can be used to send sync msg in version 0 or 1 format. sync_version value is logical, Value 1 (default) New version 0 Plain old version Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 28 ++++++++- net/netfilter/ipvs/ip_vs_sync.c | 134 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a5bd002..d12a13c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -92,7 +92,7 @@ int sysctl_ip_vs_nat_icmp_send = 0; int sysctl_ip_vs_conntrack; #endif int sysctl_ip_vs_snat_reroute = 1; - +int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */ #ifdef CONFIG_IP_VS_DEBUG static int sysctl_ip_vs_debug_level = 0; @@ -1536,6 +1536,25 @@ proc_do_sync_threshold(ctl_table *table, int write, return rc; } +static int +proc_do_sync_mode(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int *valp = table->data; + int val = *valp; + int rc; + + rc = proc_dointvec(table, write, buffer, lenp, ppos); + if (write && (*valp != val)) { + if ((*valp < 0) || (*valp > 1)) { + /* Restore the correct value */ + *valp = val; + } else { + ip_vs_sync_switch_mode(val); + } + } + return rc; +} /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) @@ -1602,6 +1621,13 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .procname = "sync_version", + .data = &sysctl_ip_vs_sync_ver, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_do_sync_mode, + }, #if 0 { .procname = "timeout_established", diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index df5abf0..c1c167a 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -5,6 +5,18 @@ * high-performance and highly available server based on a * cluster of servers. * + * Version 1, is capable of handling both version 0 and 1 messages. + * Version 0 is the plain old format. + * Note Version 0 receivers will just drop Ver 1 messages. + * Version 1 is capable of handle IPv6, Persistence data, + * time-outs, and firewall marks. + * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. + * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 + * + * Definitions Message: is a complete datagram + * Sync_conn: is a part of a Message + * Param Data is an option to a Sync_conn. + * * Authors: Wensong Zhang * * ip_vs_sync: sync connection info from master load balancer to backups @@ -15,6 +27,8 @@ * Alexandre Cassen : Added SyncID support for incoming sync * messages filtering. * Justin Ossevoort : Fix endian problem on sync message size. + * Hans Schillstrom : Added Version 1: i.e. IPv6, + * Persistence support, fwmark and time-out. */ #define KMSG_COMPONENT "IPVS" @@ -392,6 +406,121 @@ get_curr_sync_buff(unsigned long time) } /* + * Switch mode from sending version 0 or 1 + * - must handle sync_buf + */ +void ip_vs_sync_switch_mode(int mode) { + + if (!ip_vs_sync_state & IP_VS_STATE_MASTER) + return; + if (mode == sysctl_ip_vs_sync_ver || !curr_sb) + return; + + spin_lock_bh(&curr_sb_lock); + /* Buffer empty ? then let buf_create do the job */ + if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { + kfree(curr_sb); + curr_sb = NULL; + } else { + spin_lock_bh(&ip_vs_sync_lock); + if (ip_vs_sync_state & IP_VS_STATE_MASTER) + list_add_tail(&curr_sb->list, &ip_vs_sync_queue); + else + ip_vs_sync_buff_release(curr_sb); + spin_unlock_bh(&ip_vs_sync_lock); + } + spin_unlock_bh(&curr_sb_lock); +} + +/* + * Create a new sync buffer for Version 0 proto. + */ +static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) +{ + struct ip_vs_sync_buff *sb; + struct ip_vs_sync_mesg_v0 *mesg; + + if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) + return NULL; + + if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { + kfree(sb); + return NULL; + } + mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; + mesg->nr_conns = 0; + mesg->syncid = ip_vs_master_syncid; + mesg->size = 4; + sb->head = (unsigned char *)mesg + 4; + sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen; + sb->firstuse = jiffies; + return sb; +} + +/* + * Version 0 , could be switched in by sys_ctl. + * Add an ip_vs_conn information into the current sync_buff. + */ +void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) +{ + struct ip_vs_sync_mesg_v0 *m; + struct ip_vs_sync_conn_v0 *s; + int len; + + if (unlikely(cp->af != AF_INET)) + return; + /* Do not sync ONE PACKET */ + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return; + + spin_lock(&curr_sb_lock); + if (!curr_sb) { + if (!(curr_sb=ip_vs_sync_buff_create_v0())) { + spin_unlock(&curr_sb_lock); + pr_err("ip_vs_sync_buff_create failed.\n"); + return; + } + } + + len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : + SIMPLE_CONN_SIZE; + m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg; + s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; + + /* copy members */ + s->reserved = 0; + s->protocol = cp->protocol; + s->cport = cp->cport; + s->vport = cp->vport; + s->dport = cp->dport; + s->caddr = cp->caddr.ip; + s->vaddr = cp->vaddr.ip; + s->daddr = cp->daddr.ip; + s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); + s->state = htons(cp->state); + if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { + struct ip_vs_sync_conn_options *opt = + (struct ip_vs_sync_conn_options *)&s[1]; + memcpy(opt, &cp->in_seq, sizeof(*opt)); + } + + m->nr_conns++; + m->size += len; + curr_sb->head += len; + + /* check if there is a space for next one */ + if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) { + sb_queue_tail(curr_sb); + curr_sb = NULL; + } + spin_unlock(&curr_sb_lock); + + /* synchronize its controller if it has */ + if (cp->control) + ip_vs_sync_conn(cp->control); +} + +/* * Add an ip_vs_conn information into the current sync_buff. * Called by ip_vs_in. * Sending Version 1 messages @@ -403,6 +532,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) __u8 *p; unsigned int len, pe_name_len, pad; + /* Handle old version of the protocol */ + if (sysctl_ip_vs_sync_ver == 0) { + ip_vs_sync_conn_v0(cp); + return; + } /* Do not sync ONE PACKET */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) goto control; -- cgit v1.1 From ae90bdeaeac6b964b7a1e853a90a19f358a9ac20 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Wed, 15 Dec 2010 23:53:41 +0100 Subject: netfilter: fix compilation when conntrack is disabled but tproxy is enabled The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but failed to update the #ifdef stanzas guarding the defragmentation related fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c. This patch adds the required #ifdefs so that IPv6 tproxy can truly be used without connection tracking. Original report: http://marc.info/?l=linux-netdev&m=129010118516341&w=2 Reported-by: Randy Dunlap Signed-off-by: KOVACS Krisztian Acked-by: Randy Dunlap Signed-off-by: Patrick McHardy --- net/core/skbuff.c | 2 ++ net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 104f844..74ebf4b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff *skb) } #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb->nfct); +#endif +#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED nf_conntrack_put_reasm(skb->nfct_reasm); #endif #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index 99abfb5..97c5b21 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -19,13 +19,15 @@ #include #include +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #include #include #include #include -#include #include +#endif +#include #include static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, @@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone = NF_CT_DEFAULT_ZONE; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); +#endif #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && @@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum, { struct sk_buff *reasm; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) /* Previously seen (loopback)? */ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; +#endif reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); /* queued */ -- cgit v1.1 From 61b1ab4583e275af216c8454b9256de680499b19 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:42 +0100 Subject: IPVS: netns, add basic init per netns. Preparation for network name-space init, in this stage some empty functions exists. In most files there is a check if it is root ns i.e. init_net if (!net_eq(net, &init_net)) return ... this will be removed by the last patch, when enabling name-space. *v3 ip_vs_conn.c merge error corrected. net_ipvs #ifdef removed as sugested by Jan Engelhardt [ horms@verge.net.au: Removed whitespace-change-only hunks ] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_app.c | 28 +++++++++++++++--- net/netfilter/ipvs/ip_vs_conn.c | 34 ++++++++++++++++++---- net/netfilter/ipvs/ip_vs_core.c | 63 ++++++++++++++++++++++++++++++++++++++-- net/netfilter/ipvs/ip_vs_ctl.c | 49 +++++++++++++++++++++++++------ net/netfilter/ipvs/ip_vs_est.c | 20 ++++++++++++- net/netfilter/ipvs/ip_vs_ftp.c | 34 +++++++++++++++++++--- net/netfilter/ipvs/ip_vs_lblc.c | 37 +++++++++++++++++++++-- net/netfilter/ipvs/ip_vs_lblcr.c | 38 +++++++++++++++++++++--- net/netfilter/ipvs/ip_vs_proto.c | 19 ++++++++++++ net/netfilter/ipvs/ip_vs_sync.c | 27 +++++++++++++++++ 10 files changed, 316 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index a475ede..40b09cc 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -569,15 +569,35 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -int __init ip_vs_app_init(void) +static int __net_init __ip_vs_app_init(struct net *net) { - /* we will replace it with proc_net_ipvs_create() soon */ - proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } +static void __net_exit __ip_vs_app_cleanup(struct net *net) +{ + proc_net_remove(net, "ip_vs_app"); +} + +static struct pernet_operations ip_vs_app_ops = { + .init = __ip_vs_app_init, + .exit = __ip_vs_app_cleanup, +}; + +int __init ip_vs_app_init(void) +{ + int rv; + + rv = register_pernet_subsys(&ip_vs_app_ops); + return rv; +} + void ip_vs_app_cleanup(void) { - proc_net_remove(&init_net, "ip_vs_app"); + unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 66e4662..7c1b502 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1201,11 +1201,36 @@ static void ip_vs_conn_flush(void) goto flush_again; } } +/* + * per netns init and exit + */ +int __net_init __ip_vs_conn_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); + proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); + return 0; +} + +static void __net_exit __ip_vs_conn_cleanup(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + proc_net_remove(net, "ip_vs_conn"); + proc_net_remove(net, "ip_vs_conn_sync"); +} +static struct pernet_operations ipvs_conn_ops = { + .init = __ip_vs_conn_init, + .exit = __ip_vs_conn_cleanup, +}; int __init ip_vs_conn_init(void) { int idx; + int retc; /* Compute size and mask */ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; @@ -1243,24 +1268,21 @@ int __init ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); - proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); + retc = register_pernet_subsys(&ipvs_conn_ops); /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return 0; + return retc; } - void ip_vs_conn_cleanup(void) { + unregister_pernet_subsys(&ipvs_conn_ops); /* flush all the connection entries first */ ip_vs_conn_flush(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - proc_net_remove(&init_net, "ip_vs_conn"); - proc_net_remove(&init_net, "ip_vs_conn_sync"); vfree(ip_vs_conn_tab); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5287771..206f40c 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -41,6 +41,7 @@ #include /* for icmp_send */ #include #include +#include /* net_generic() */ #include #include @@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put); EXPORT_SYMBOL(ip_vs_get_debug_level); #endif +int ip_vs_net_id __read_mostly; +#ifdef IP_VS_GENERIC_NETNS +EXPORT_SYMBOL(ip_vs_net_id); +#endif +/* netns cnt used for uniqueness */ +static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); /* ID used in ICMP lookups */ #define icmp_id(icmph) (((icmph)->un).echo.id) @@ -1813,6 +1820,44 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { #endif }; +/* + * Initialize IP Virtual Server netns mem. + */ +static int __net_init __ip_vs_init(struct net *net) +{ + struct netns_ipvs *ipvs; + + if (!net_eq(net, &init_net)) { + pr_err("The final patch for enabling netns is missing\n"); + return -EPERM; + } + ipvs = net_generic(net, ip_vs_net_id); + if (ipvs == NULL) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + /* Counters used for creating unique names */ + ipvs->gen = atomic_read(&ipvs_netns_cnt); + atomic_inc(&ipvs_netns_cnt); + net->ipvs = ipvs; + printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n", + sizeof(struct netns_ipvs), ipvs->gen); + return 0; +} + +static void __net_exit __ip_vs_cleanup(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen); +} + +static struct pernet_operations ipvs_core_ops = { + .init = __ip_vs_init, + .exit = __ip_vs_cleanup, + .id = &ip_vs_net_id, + .size = sizeof(struct netns_ipvs), +}; /* * Initialize IP Virtual Server @@ -1821,8 +1866,11 @@ static int __init ip_vs_init(void) { int ret; - ip_vs_estimator_init(); + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ + if (ret < 0) + return ret; + ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { pr_err("can't setup control.\n"); @@ -1843,15 +1891,23 @@ static int __init ip_vs_init(void) goto cleanup_app; } + ret = ip_vs_sync_init(); + if (ret < 0) { + pr_err("can't setup sync data.\n"); + goto cleanup_conn; + } + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); if (ret < 0) { pr_err("can't register hooks.\n"); - goto cleanup_conn; + goto cleanup_sync; } pr_info("ipvs loaded.\n"); return ret; +cleanup_sync: + ip_vs_sync_cleanup(); cleanup_conn: ip_vs_conn_cleanup(); cleanup_app: @@ -1861,17 +1917,20 @@ static int __init ip_vs_init(void) ip_vs_control_cleanup(); cleanup_estimator: ip_vs_estimator_cleanup(); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ return ret; } static void __exit ip_vs_cleanup(void) { nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); ip_vs_estimator_cleanup(); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ca49e92..ceeef43 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3406,6 +3406,42 @@ static void ip_vs_genl_unregister(void) /* End of Generic Netlink interface definitions */ +/* + * per netns intit/exit func. + */ +int __net_init __ip_vs_control_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); + sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + vs_vars); + if (sysctl_header == NULL) + goto err_reg; + ip_vs_new_estimator(&ip_vs_stats); + return 0; + +err_reg: + return -ENOMEM; +} + +static void __net_exit __ip_vs_control_cleanup(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + ip_vs_kill_estimator(&ip_vs_stats); + unregister_net_sysctl_table(sysctl_header); + proc_net_remove(net, "ip_vs_stats"); + proc_net_remove(net, "ip_vs"); +} + +static struct pernet_operations ipvs_control_ops = { + .init = __ip_vs_control_init, + .exit = __ip_vs_control_cleanup, +}; int __init ip_vs_control_init(void) { @@ -3437,12 +3473,9 @@ int __init ip_vs_control_init(void) return ret; } - proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); - - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); - - ip_vs_new_estimator(&ip_vs_stats); + ret = register_pernet_subsys(&ipvs_control_ops); + if (ret) + return ret; /* Hook the defense timer */ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); @@ -3459,9 +3492,7 @@ void ip_vs_control_cleanup(void) cancel_delayed_work_sync(&defense_work); cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); - unregister_sysctl_table(sysctl_header); - proc_net_remove(&init_net, "ip_vs_stats"); - proc_net_remove(&init_net, "ip_vs"); + unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index ff28801..7417a0c 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -157,13 +157,31 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) est->outbps = 0; } +static int __net_init __ip_vs_estimator_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + return 0; +} + +static struct pernet_operations ip_vs_app_ops = { + .init = __ip_vs_estimator_init, +}; + int __init ip_vs_estimator_init(void) { + int rv; + + rv = register_pernet_subsys(&ip_vs_app_ops); + if (rv < 0) + return rv; mod_timer(&est_timer, jiffies + 2 * HZ); - return 0; + return rv; } void ip_vs_estimator_cleanup(void) { del_timer_sync(&est_timer); + unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 84aef65..0e762f3 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -399,15 +399,17 @@ static struct ip_vs_app ip_vs_ftp = { .pkt_in = ip_vs_ftp_in, }; - /* - * ip_vs_ftp initialization + * per netns ip_vs_ftp initialization */ -static int __init ip_vs_ftp_init(void) +static int __net_init __ip_vs_ftp_init(struct net *net) { int i, ret; struct ip_vs_app *app = &ip_vs_ftp; + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + ret = register_ip_vs_app(app); if (ret) return ret; @@ -427,14 +429,38 @@ static int __init ip_vs_ftp_init(void) return ret; } +/* + * netns exit + */ +static void __ip_vs_ftp_exit(struct net *net) +{ + struct ip_vs_app *app = &ip_vs_ftp; + + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + unregister_ip_vs_app(app); +} + +static struct pernet_operations ip_vs_ftp_ops = { + .init = __ip_vs_ftp_init, + .exit = __ip_vs_ftp_exit, +}; +int __init ip_vs_ftp_init(void) +{ + int rv; + + rv = register_pernet_subsys(&ip_vs_ftp_ops); + return rv; +} /* * ip_vs_ftp finish. */ static void __exit ip_vs_ftp_exit(void) { - unregister_ip_vs_app(&ip_vs_ftp); + unregister_pernet_subsys(&ip_vs_ftp_ops); } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 9323f89..84278fb 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -543,23 +543,54 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .schedule = ip_vs_lblc_schedule, }; +/* + * per netns init. + */ +static int __net_init __ip_vs_lblc_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + vs_vars_table); + if (!sysctl_header) + return -ENOMEM; + + return 0; +} + +static void __net_exit __ip_vs_lblc_exit(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + unregister_net_sysctl_table(sysctl_header); +} + +static struct pernet_operations ip_vs_lblc_ops = { + .init = __ip_vs_lblc_init, + .exit = __ip_vs_lblc_exit, +}; static int __init ip_vs_lblc_init(void) { int ret; - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); + ret = register_pernet_subsys(&ip_vs_lblc_ops); + if (ret) + return ret; + ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); if (ret) - unregister_sysctl_table(sysctl_header); + unregister_pernet_subsys(&ip_vs_lblc_ops); return ret; } static void __exit ip_vs_lblc_cleanup(void) { - unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); + unregister_pernet_subsys(&ip_vs_lblc_ops); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index dbeed8e..7c7396a 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -744,23 +744,53 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .schedule = ip_vs_lblcr_schedule, }; +/* + * per netns init. + */ +static int __net_init __ip_vs_lblcr_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + vs_vars_table); + if (!sysctl_header) + return -ENOMEM; + + return 0; +} + +static void __net_exit __ip_vs_lblcr_exit(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + unregister_net_sysctl_table(sysctl_header); +} + +static struct pernet_operations ip_vs_lblcr_ops = { + .init = __ip_vs_lblcr_init, + .exit = __ip_vs_lblcr_exit, +}; static int __init ip_vs_lblcr_init(void) { int ret; - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); + ret = register_pernet_subsys(&ip_vs_lblcr_ops); + if (ret) + return ret; + ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); if (ret) - unregister_sysctl_table(sysctl_header); + unregister_pernet_subsys(&ip_vs_lblcr_ops); return ret; } - static void __exit ip_vs_lblcr_cleanup(void) { - unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); + unregister_pernet_subsys(&ip_vs_lblcr_ops); } diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index c539983..4539294 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -236,6 +236,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); } +/* + * per network name-space init + */ +static int __net_init __ip_vs_protocol_init(struct net *net) +{ + return 0; +} + +static void __net_exit __ip_vs_protocol_cleanup(struct net *net) +{ + /* empty */ +} + +static struct pernet_operations ipvs_proto_ops = { + .init = __ip_vs_protocol_init, + .exit = __ip_vs_protocol_cleanup, +}; int __init ip_vs_protocol_init(void) { @@ -265,6 +282,7 @@ int __init ip_vs_protocol_init(void) REGISTER_PROTOCOL(&ip_vs_protocol_esp); #endif pr_info("Registered protocols (%s)\n", &protocols[2]); + return register_pernet_subsys(&ipvs_proto_ops); return 0; } @@ -275,6 +293,7 @@ void ip_vs_protocol_cleanup(void) struct ip_vs_protocol *pp; int i; + unregister_pernet_subsys(&ipvs_proto_ops); /* unregister all the ipvs protocols */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pp = ip_vs_proto_table[i]) != NULL) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c1c167a..3668739 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1639,3 +1639,30 @@ int stop_sync_thread(int state) return 0; } + +/* + * Initialize data struct for each netns + */ +static int __net_init __ip_vs_sync_init(struct net *net) +{ + return 0; +} + +static void __ip_vs_sync_cleanup(struct net *net) +{ +} +static struct pernet_operations ipvs_sync_ops = { + .init = __ip_vs_sync_init, + .exit = __ip_vs_sync_cleanup, +}; + + +int __init ip_vs_sync_init(void) +{ + return register_pernet_subsys(&ipvs_sync_ops); +} + +void __exit ip_vs_sync_cleanup(void) +{ + unregister_pernet_subsys(&ipvs_sync_ops); +} -- cgit v1.1 From fc723250c9cb046cc19833a2b1c4309bbf59ac36 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:43 +0100 Subject: IPVS: netns to services part 1 Services hash tables got netns ptr a hash arg, While Real Servers (rs) has been moved to ipvs struct. Two new inline functions added to get net ptr from skb. Since ip_vs is called from different contexts there is two places to dig for the net ptr skb->dev or skb->sk this is handled in skb_net() and skb_sknet() Global functions, ip_vs_service_get() ip_vs_lookup_real_service() etc have got struct net *net as first param. If possible get net ptr skb etc, - if not &init_net is used at this early stage of patching. ip_vs_ctl.c procfs not ready for netns yet. *v3 Comments by Julian - __ip_vs_service_find and __ip_vs_svc_fwm_find are fast path, net_eq(svc->net, net) so the check is at the end now. - net = skb_net(skb) in ip_vs_out moved after check for skb_dst. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 4 +- net/netfilter/ipvs/ip_vs_ctl.c | 232 +++++++++++++++++++--------------- net/netfilter/ipvs/ip_vs_proto_sctp.c | 5 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 7 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 5 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 7 files changed, 147 insertions(+), 110 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7c1b502..7a0e79e 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -611,7 +611,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, + dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark); ip_vs_bind_dest(cp, dest); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 206f40c..d0616ea 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1031,6 +1031,7 @@ drop: static unsigned int ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) { + struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -1054,6 +1055,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely(!skb_dst(skb))) return NF_ACCEPT; + net = skb_net(skb); ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1119,7 +1121,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(af, iph.protocol, + if (ip_vs_lookup_real_service(net, af, iph.protocol, &iph.saddr, pptr[0])) { /* diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ceeef43..2d7c96b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -288,15 +288,6 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* - * Hash table: for real service lookups - */ -#define IP_VS_RTAB_BITS 4 -#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) -#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) - -static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; - -/* * Trash for destinations */ static LIST_HEAD(ip_vs_dest_trash); @@ -311,9 +302,9 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); /* * Returns hash value for virtual service */ -static __inline__ unsigned -ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, - __be16 port) +static inline unsigned +ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, + const union nf_inet_addr *addr, __be16 port) { register unsigned porth = ntohs(port); __be32 addr_fold = addr->ip; @@ -323,6 +314,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif + addr_fold ^= ((size_t)net>>8); return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) & IP_VS_SVC_TAB_MASK; @@ -331,13 +323,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, /* * Returns hash value of fwmark for virtual service lookup */ -static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) +static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) { - return fwmark & IP_VS_SVC_TAB_MASK; + return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; } /* - * Hashes a service in the ip_vs_svc_table by + * Hashes a service in the ip_vs_svc_table by * or in the ip_vs_svc_fwm_table by fwmark. * Should be called with locked tables. */ @@ -353,16 +345,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* - * Hash it by in ip_vs_svc_table + * Hash it by in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, - svc->port); + hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, + &svc->addr, svc->port); list_add(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* - * Hash it by fwmark in ip_vs_svc_fwm_table + * Hash it by fwmark in svc_fwm_table */ - hash = ip_vs_svc_fwm_hashkey(svc->fwmark); + hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } @@ -374,7 +366,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* - * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. + * Unhashes a service from svc_table / svc_fwm_table. * Should be called with locked tables. */ static int ip_vs_svc_unhash(struct ip_vs_service *svc) @@ -386,10 +378,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) } if (svc->fwmark == 0) { - /* Remove it from the ip_vs_svc_table table */ + /* Remove it from the svc_table table */ list_del(&svc->s_list); } else { - /* Remove it from the ip_vs_svc_fwm_table table */ + /* Remove it from the svc_fwm_table table */ list_del(&svc->f_list); } @@ -400,23 +392,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) /* - * Get service by {proto,addr,port} in the service table. + * Get service by {netns, proto,addr,port} in the service table. */ static inline struct ip_vs_service * -__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, - __be16 vport) +__ip_vs_service_find(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { unsigned hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) - && (svc->protocol == protocol)) { + && (svc->protocol == protocol) + && net_eq(svc->net, net)) { /* HIT */ return svc; } @@ -430,16 +423,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, * Get service by {fwmark} in the service table. */ static inline struct ip_vs_service * -__ip_vs_svc_fwm_find(int af, __u32 fwmark) +__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) { unsigned hash; struct ip_vs_service *svc; /* Check for fwmark addressed entries */ - hash = ip_vs_svc_fwm_hashkey(fwmark); + hash = ip_vs_svc_fwm_hashkey(net, fwmark); list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark && svc->af == af) { + if (svc->fwmark == fwmark && svc->af == af + && net_eq(svc->net, net)) { /* HIT */ return svc; } @@ -449,7 +443,7 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, +ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -459,14 +453,15 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) + svc = __ip_vs_svc_fwm_find(net, af, fwmark); + if (fwmark && svc) goto out; /* * Check the table hashed by * for "full" addressed entries */ - svc = __ip_vs_service_find(af, protocol, vaddr, vport); + svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -476,7 +471,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); + svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -484,7 +479,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_find(af, protocol, vaddr, 0); + svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); } out: @@ -545,10 +540,10 @@ static inline unsigned ip_vs_rs_hashkey(int af, } /* - * Hashes ip_vs_dest in ip_vs_rtable by . + * Hashes ip_vs_dest in rs_table by . * should be called with locked tables. */ -static int ip_vs_rs_hash(struct ip_vs_dest *dest) +static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { unsigned hash; @@ -562,19 +557,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) */ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); - list_add(&dest->d_list, &ip_vs_rtable[hash]); + list_add(&dest->d_list, &ipvs->rs_table[hash]); return 1; } /* - * UNhashes ip_vs_dest from ip_vs_rtable. + * UNhashes ip_vs_dest from rs_table. * should be called with locked tables. */ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) { /* - * Remove it from the ip_vs_rtable table. + * Remove it from the rs_table table. */ if (!list_empty(&dest->d_list)) { list_del(&dest->d_list); @@ -588,10 +583,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(int af, __u16 protocol, +ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport) { + struct netns_ipvs *ipvs = net_ipvs(net); unsigned hash; struct ip_vs_dest *dest; @@ -602,7 +598,7 @@ ip_vs_lookup_real_service(int af, __u16 protocol, hash = ip_vs_rs_hashkey(af, daddr, dport); read_lock(&__ip_vs_rs_lock); - list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { + list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { if ((dest->af == af) && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->port == dport) @@ -652,7 +648,8 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * ip_vs_lookup_real_service() looked promissing, but * seems not working as expected. */ -struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, +struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, + const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, __u32 fwmark) @@ -660,7 +657,7 @@ struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, struct ip_vs_dest *dest; struct ip_vs_service *svc; - svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); @@ -768,6 +765,7 @@ static void __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { + struct netns_ipvs *ipvs = net_ipvs(svc->net); int conn_flags; /* set the weight and the flags */ @@ -780,11 +778,11 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, conn_flags |= IP_VS_CONN_F_NOOUTPUT; } else { /* - * Put the real service in ip_vs_rtable if not present. + * Put the real service in rs_table if not present. * For now only for NAT! */ write_lock_bh(&__ip_vs_rs_lock); - ip_vs_rs_hash(dest); + ip_vs_rs_hash(ipvs, dest); write_unlock_bh(&__ip_vs_rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -1117,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) * Add a service into the service hash table */ static int -ip_vs_add_service(struct ip_vs_service_user_kern *u, +ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { int ret = 0; @@ -1172,6 +1170,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, svc->flags = u->flags; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; + svc->net = net; INIT_LIST_HEAD(&svc->destinations); rwlock_init(&svc->sched_lock); @@ -1428,17 +1427,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(void) +static int ip_vs_flush(struct net *net) { int idx; struct ip_vs_service *svc, *nxt; /* - * Flush the service table hashed by + * Flush the service table hashed by */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { - ip_vs_unlink_service(svc); + list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], + s_list) { + if (net_eq(svc->net, net)) + ip_vs_unlink_service(svc); } } @@ -1448,7 +1449,8 @@ static int ip_vs_flush(void) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry_safe(svc, nxt, &ip_vs_svc_fwm_table[idx], f_list) { - ip_vs_unlink_service(svc); + if (net_eq(svc->net, net)) + ip_vs_unlink_service(svc); } } @@ -1472,20 +1474,22 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) return 0; } -static int ip_vs_zero_all(void) +static int ip_vs_zero_all(struct net *net) { int idx; struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - ip_vs_zero_service(svc); + if (net_eq(svc->net, net)) + ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - ip_vs_zero_service(svc); + if (net_eq(svc->net, net)) + ip_vs_zero_service(svc); } } @@ -1763,6 +1767,7 @@ static struct ctl_table_header * sysctl_header; #ifdef CONFIG_PROC_FS struct ip_vs_iter { + struct seq_net_private p; /* Do not move this, netns depends upon it*/ struct list_head *table; int bucket; }; @@ -1789,6 +1794,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags) /* Get the Nth entry in the two lists */ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { + struct net *net = seq_file_net(seq); struct ip_vs_iter *iter = seq->private; int idx; struct ip_vs_service *svc; @@ -1796,7 +1802,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (pos-- == 0){ + if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; return svc; @@ -1807,7 +1813,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (pos-- == 0) { + if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; return svc; @@ -1961,7 +1967,7 @@ static const struct seq_operations ip_vs_info_seq_ops = { static int ip_vs_info_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ip_vs_info_seq_ops, + return seq_open_net(inode, file, &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter)); } @@ -2011,7 +2017,7 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) { - return single_open(file, ip_vs_stats_show, NULL); + return single_open_net(inode, file, ip_vs_stats_show); } static const struct file_operations ip_vs_stats_fops = { @@ -2113,6 +2119,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, static int do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { + struct net *net = sock_net(sk); int ret; unsigned char arg[MAX_ARG_LEN]; struct ip_vs_service_user *usvc_compat; @@ -2147,7 +2154,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(); + ret = ip_vs_flush(net); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ @@ -2174,7 +2181,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { - ret = ip_vs_zero_all(); + ret = ip_vs_zero_all(net); goto out_unlock; } } @@ -2191,10 +2198,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Lookup the exact service by or fwmark */ if (usvc.fwmark == 0) - svc = __ip_vs_service_find(usvc.af, usvc.protocol, + svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); + svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2207,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(&usvc, &svc); + ret = ip_vs_add_service(net, &usvc, &svc); break; case IP_VS_SO_SET_EDIT: ret = ip_vs_edit_service(svc, &usvc); @@ -2267,7 +2274,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) } static inline int -__ip_vs_get_service_entries(const struct ip_vs_get_services *get, +__ip_vs_get_service_entries(struct net *net, + const struct ip_vs_get_services *get, struct ip_vs_get_services __user *uptr) { int idx, count=0; @@ -2278,7 +2286,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) + if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; if (count >= get->num_services) @@ -2297,7 +2305,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) + if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; if (count >= get->num_services) @@ -2317,7 +2325,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, } static inline int -__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, +__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; @@ -2325,9 +2333,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, int ret = 0; if (get->fwmark) - svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); + svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); else - svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, + svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, get->port); if (svc) { @@ -2401,7 +2409,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) unsigned char arg[128]; int ret = 0; unsigned int copylen; + struct net *net = sock_net(sk); + BUG_ON(!net); if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2463,7 +2473,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_service_entries(get, user); + ret = __ip_vs_get_service_entries(net, get, user); } break; @@ -2476,10 +2486,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; if (entry->fwmark) - svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); + svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); else - svc = __ip_vs_service_find(AF_INET, entry->protocol, - &addr, entry->port); + svc = __ip_vs_service_find(net, AF_INET, + entry->protocol, &addr, + entry->port); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2502,7 +2513,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_dest_entries(get, user); + ret = __ip_vs_get_dest_entries(net, get, user); } break; @@ -2722,11 +2733,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, int idx = 0, i; int start = cb->args[0]; struct ip_vs_service *svc; + struct net *net = skb_sknet(skb); mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start) + if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -2737,7 +2749,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start) + if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -2753,7 +2765,8 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, +static int ip_vs_genl_parse_service(struct net *net, + struct ip_vs_service_user_kern *usvc, struct nlattr *nla, int full_entry, struct ip_vs_service **ret_svc) { @@ -2796,9 +2809,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, } if (usvc->fwmark) - svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); + svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); else - svc = __ip_vs_service_find(usvc->af, usvc->protocol, + svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, &usvc->addr, usvc->port); *ret_svc = svc; @@ -2835,13 +2848,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, return 0; } -static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) +static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, + struct nlattr *nla) { struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; int ret; - ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); + ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); return ret ? ERR_PTR(ret) : svc; } @@ -2909,6 +2923,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; + struct net *net; mutex_lock(&__ip_vs_mutex); @@ -2917,7 +2932,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) goto out_err; - svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); + net = skb_sknet(skb); + svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3102,13 +3118,15 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) struct ip_vs_dest_user_kern udest; int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; + struct net *net; + net = skb_sknet(skb); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(); + ret = ip_vs_flush(net); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { ret = ip_vs_genl_set_config(info->attrs); @@ -3133,7 +3151,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { - ret = ip_vs_zero_all(); + ret = ip_vs_zero_all(net); goto out; } @@ -3143,7 +3161,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) need_full_svc = 1; - ret = ip_vs_genl_parse_service(&usvc, + ret = ip_vs_genl_parse_service(net, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], need_full_svc, &svc); if (ret) @@ -3173,7 +3191,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) switch (cmd) { case IPVS_CMD_NEW_SERVICE: if (svc == NULL) - ret = ip_vs_add_service(&usvc, &svc); + ret = ip_vs_add_service(net, &usvc, &svc); else ret = -EEXIST; break; @@ -3211,7 +3229,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; void *reply; int ret, cmd, reply_cmd; + struct net *net; + net = skb_sknet(skb); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3240,7 +3260,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_service *svc; - svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); + svc = ip_vs_genl_find_service(net, + info->attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc)) { ret = PTR_ERR(svc); goto out_err; @@ -3411,9 +3432,15 @@ static void ip_vs_genl_unregister(void) */ int __net_init __ip_vs_control_init(struct net *net) { + int idx; + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, @@ -3445,43 +3472,48 @@ static struct pernet_operations ipvs_control_ops = { int __init ip_vs_control_init(void) { - int ret; int idx; + int ret; EnterFunction(2); - /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ + /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { INIT_LIST_HEAD(&ip_vs_svc_table[idx]); INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_rtable[idx]); + + ret = register_pernet_subsys(&ipvs_control_ops); + if (ret) { + pr_err("cannot register namespace.\n"); + goto err; } - smp_wmb(); + + smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - return ret; + goto err_net; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); nf_unregister_sockopt(&ip_vs_sockopts); - return ret; + goto err_net; } - ret = register_pernet_subsys(&ipvs_control_ops); - if (ret) - return ret; - /* Hook the defense timer */ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); LeaveFunction(2); return 0; + +err_net: + unregister_pernet_subsys(&ipvs_control_ops); +err: + return ret; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a315159..521b827 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -12,6 +12,7 @@ static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; @@ -27,9 +28,9 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, sizeof(_schunkh), &_schunkh); if (sch == NULL) return 0; - + net = skb_net(skb); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, sh->dest))) { int ignored; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 1cdab12..c175d31 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -31,6 +31,7 @@ static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; struct ip_vs_iphdr iph; @@ -42,11 +43,11 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, *verdict = NF_DROP; return 0; } - + net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ if (th->syn && - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, - th->dest))) { + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, + &iph.daddr, th->dest))) { int ignored; if (ip_vs_todrop()) { diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index cd398de..5ab54f6 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -31,6 +31,7 @@ static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; struct ip_vs_iphdr iph; @@ -42,8 +43,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, *verdict = NF_DROP; return 0; } - - svc = ip_vs_service_get(af, skb->mark, iph.protocol, + net = skb_net(skb); + svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, uh->dest); if (svc) { int ignored; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3668739..662aa2c 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -749,7 +749,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(type, daddr, dport, param->vaddr, + dest = ip_vs_find_dest(&init_net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark); /* Set the approprite ativity flag */ -- cgit v1.1 From d0a1eef9c38218af20c809b2220a960b7ed81a36 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:44 +0100 Subject: IPVS: netns awarness to lblcr sheduler var sysctl_ip_vs_lblcr_expiration moved to ipvs struct as sysctl_lblcr_expiration procfs updated to handle this. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_lblcr.c | 54 ++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 7c7396a..61ae8cf 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -70,8 +70,6 @@ * entries that haven't been touched for a day. */ #define COUNT_FOR_FULL_EXPIRATION 30 -static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ; - /* * for IPVS lblcr entry hash table @@ -296,7 +294,7 @@ struct ip_vs_lblcr_table { static ctl_table vs_vars_table[] = { { .procname = "lblcr_expiration", - .data = &sysctl_ip_vs_lblcr_expiration, + .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = { { } }; -static struct ctl_table_header * sysctl_header; - static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { list_del(&en->list); @@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; + struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; isched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { - if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, - now)) + if (time_after(en->lastuse + + ipvs->sysctl_lblcr_expiration, now)) continue; ip_vs_lblcr_free(en); @@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { + struct netns_ipvs *ipvs = net_ipvs(svc->net); /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && time_after(jiffies, en->set.lastmod + - sysctl_ip_vs_lblcr_expiration)) { + ipvs->sysctl_lblcr_expiration)) { struct ip_vs_dest *m; write_lock(&en->set.lock); @@ -749,23 +747,43 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = */ static int __net_init __ip_vs_lblcr_init(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - - sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, - vs_vars_table); - if (!sysctl_header) - return -ENOMEM; + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!net_eq(net, &init_net)) { + ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, + sizeof(vs_vars_table), + GFP_KERNEL); + if (ipvs->lblcr_ctl_table == NULL) + goto err_dup; + } else + ipvs->lblcr_ctl_table = vs_vars_table; + ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; + ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; + + ipvs->lblcr_ctl_header = + register_net_sysctl_table(net, net_vs_ctl_path, + ipvs->lblcr_ctl_table); + if (!ipvs->lblcr_ctl_header) + goto err_reg; return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(ipvs->lblcr_ctl_table); + +err_dup: + return -ENOMEM; } static void __net_exit __ip_vs_lblcr_exit(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return; + struct netns_ipvs *ipvs = net_ipvs(net); + + unregister_net_sysctl_table(ipvs->lblcr_ctl_header); - unregister_net_sysctl_table(sysctl_header); + if (!net_eq(net, &init_net)) + kfree(ipvs->lblcr_ctl_table); } static struct pernet_operations ip_vs_lblcr_ops = { -- cgit v1.1 From b6e885ddb903e681b7cbb4e68ad775154660e1f4 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:45 +0100 Subject: IPVS: netns awarness to lblc sheduler var sysctl_ip_vs_lblc_expiration moved to ipvs struct as sysctl_lblc_expiration procfs updated to handle this. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_lblc.c | 50 ++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 84278fb..d5bec33 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -70,7 +70,6 @@ * entries that haven't been touched for a day. */ #define COUNT_FOR_FULL_EXPIRATION 30 -static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; /* @@ -117,7 +116,7 @@ struct ip_vs_lblc_table { static ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", - .data = &sysctl_ip_vs_lblc_expiration, + .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = { { } }; -static struct ctl_table_header * sysctl_header; - static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); @@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; + struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; isched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, - en->lastuse + sysctl_ip_vs_lblc_expiration)) + en->lastuse + + ipvs->sysctl_lblc_expiration)) continue; ip_vs_lblc_free(en); @@ -548,23 +547,43 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = */ static int __net_init __ip_vs_lblc_init(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - - sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, - vs_vars_table); - if (!sysctl_header) - return -ENOMEM; + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!net_eq(net, &init_net)) { + ipvs->lblc_ctl_table = kmemdup(vs_vars_table, + sizeof(vs_vars_table), + GFP_KERNEL); + if (ipvs->lblc_ctl_table == NULL) + goto err_dup; + } else + ipvs->lblc_ctl_table = vs_vars_table; + ipvs->sysctl_lblc_expiration = 24*60*60*HZ; + ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; + + ipvs->lblc_ctl_header = + register_net_sysctl_table(net, net_vs_ctl_path, + ipvs->lblc_ctl_table); + if (!ipvs->lblc_ctl_header) + goto err_reg; return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(ipvs->lblc_ctl_table); + +err_dup: + return -ENOMEM; } static void __net_exit __ip_vs_lblc_exit(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return; + struct netns_ipvs *ipvs = net_ipvs(net); + + unregister_net_sysctl_table(ipvs->lblc_ctl_header); - unregister_net_sysctl_table(sysctl_header); + if (!net_eq(net, &init_net)) + kfree(ipvs->lblc_ctl_table); } static struct pernet_operations ip_vs_lblc_ops = { @@ -586,7 +605,6 @@ static int __init ip_vs_lblc_init(void) return ret; } - static void __exit ip_vs_lblc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); -- cgit v1.1 From 252c64103237f1841088f0f29b4f084b1c774546 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:46 +0100 Subject: IPVS: netns, prepare protocol Add support for protocol data per name-space. in struct ip_vs_protocol, appcnt will be removed when all protos are modified for network name-space. This patch causes warnings of unused functions, they will be used when next patch will be applied. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto.c | 66 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 4539294..576e296 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) return 0; } +/* + * register an ipvs protocols netns related data + */ +static int +register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + unsigned hash = IP_VS_PROTO_HASH(pp->protocol); + struct ip_vs_proto_data *pd = + kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); + + if (!pd) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + pd->pp = pp; /* For speed issues */ + pd->next = ipvs->proto_data_table[hash]; + ipvs->proto_data_table[hash] = pd; + atomic_set(&pd->appcnt, 0); /* Init app counter */ + + if (pp->init_netns != NULL) + pp->init_netns(net, pd); + + return 0; +} /* * unregister an ipvs protocol @@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) return -ESRCH; } +/* + * unregister an ipvs protocols netns data + */ +static int +unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data **pd_p; + unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol); + + pd_p = &ipvs->proto_data_table[hash]; + for (; *pd_p; pd_p = &(*pd_p)->next) { + if (*pd_p == pd) { + *pd_p = pd->next; + if (pd->pp->exit_netns != NULL) + pd->pp->exit_netns(net, pd); + kfree(pd); + return 0; + } + } + + return -ESRCH; +} /* * get ip_vs_protocol object by its proto. @@ -100,6 +148,24 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) } EXPORT_SYMBOL(ip_vs_proto_get); +/* + * get ip_vs_protocol object data by netns and proto + */ +struct ip_vs_proto_data * +ip_vs_proto_data_get(struct net *net, unsigned short proto) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd; + unsigned hash = IP_VS_PROTO_HASH(proto); + + for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { + if (pd->pp->protocol == proto) + return pd; + } + + return NULL; +} +EXPORT_SYMBOL(ip_vs_proto_data_get); /* * Propagate event for state change to all protocols -- cgit v1.1 From 4a85b96c08ef84076f84e87280223a4301988ed9 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:47 +0100 Subject: IPVS: netns preparation for proto_tcp In this phase (one), all local vars will be moved to ipvs struct. Remaining work, add param struct net *net to a couple of functions that is common for all protos and use all ip_vs_proto_data *v3 Removed unused function as sugested by Simon Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ftp.c | 8 ++- net/netfilter/ipvs/ip_vs_proto.c | 13 ++++- net/netfilter/ipvs/ip_vs_proto_tcp.c | 97 +++++++++++++++++++----------------- 3 files changed, 70 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 0e762f3..b38ae94 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; + struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -257,8 +258,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * would be adjusted twice. */ + net = skb_net(skb); cp->app_data = NULL; - ip_vs_tcp_conn_listen(n_cp); + ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return ret; } @@ -287,6 +289,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -378,7 +381,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Move tunnel to listen state */ - ip_vs_tcp_conn_listen(n_cp); + net = skb_net(skb); + ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return 1; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 576e296..320c6a6 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -307,12 +307,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, */ static int __net_init __ip_vs_protocol_init(struct net *net) { +#ifdef CONFIG_IP_VS_PROTO_TCP + register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); +#endif return 0; } static void __net_exit __ip_vs_protocol_cleanup(struct net *net) { - /* empty */ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd; + int i; + + /* unregister all the ipvs proto data for this netns */ + for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { + while ((pd = ipvs->proto_data_table[i]) != NULL) + unregister_ip_vs_proto_netns(net, pd); + } } static struct pernet_operations ipvs_proto_ops = { diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index c175d31..9d9df3d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -9,8 +9,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: + * Changes: Hans Schillstrom * + * Network name space (netns) aware. + * Global data moved to netns i.e struct netns_ipvs + * tcp_timeouts table has copy per netns in a hash table per + * protocol ip_vs_proto_data and is handled by netns */ #define KMSG_COMPONENT "IPVS" @@ -345,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = { /* * Timeout table[state] */ -static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { +static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { [IP_VS_TCP_S_NONE] = 2*HZ, [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, @@ -460,13 +464,6 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) tcp_state_table = (on? tcp_states_dos : tcp_states); } -static int -tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST, - tcp_state_name_table, sname, to); -} - static inline int tcp_state_idx(struct tcphdr *th) { if (th->rst) @@ -487,6 +484,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int state_idx; int new_state = IP_VS_TCP_S_CLOSE; int state_off = tcp_state_off[direction]; + struct ip_vs_proto_data *pd; /* Temp fix */ /* * Update state offset to INPUT_ONLY if necessary @@ -542,10 +540,13 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } - cp->timeout = pp->timeout_table[cp->state = new_state]; + pd = ip_vs_proto_data_get(&init_net, pp->protocol); + if (likely(pd)) + cp->timeout = pd->timeout_table[cp->state = new_state]; + else /* What to do ? */ + cp->timeout = tcp_timeouts[cp->state = new_state]; } - /* * Handle state transitions */ @@ -573,17 +574,6 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, return 1; } - -/* - * Hash table for TCP application incarnations - */ -#define TCP_APP_TAB_BITS 4 -#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) -#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) - -static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(tcp_app_lock); - static inline __u16 tcp_app_hashkey(__be16 port) { return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) @@ -597,21 +587,23 @@ static int tcp_register_app(struct ip_vs_app *inc) __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); hash = tcp_app_hashkey(port); - spin_lock_bh(&tcp_app_lock); - list_for_each_entry(i, &tcp_apps[hash], p_list) { + spin_lock_bh(&ipvs->tcp_app_lock); + list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &tcp_apps[hash]); - atomic_inc(&ip_vs_protocol_tcp.appcnt); + list_add(&inc->p_list, &ipvs->tcp_apps[hash]); + atomic_inc(&pd->pp->appcnt); out: - spin_unlock_bh(&tcp_app_lock); + spin_unlock_bh(&ipvs->tcp_app_lock); return ret; } @@ -619,16 +611,20 @@ static int tcp_register_app(struct ip_vs_app *inc) static void tcp_unregister_app(struct ip_vs_app *inc) { - spin_lock_bh(&tcp_app_lock); - atomic_dec(&ip_vs_protocol_tcp.appcnt); + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + + spin_lock_bh(&ipvs->tcp_app_lock); + atomic_dec(&pd->pp->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&tcp_app_lock); + spin_unlock_bh(&ipvs->tcp_app_lock); } static int tcp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(&init_net); int hash; struct ip_vs_app *inc; int result = 0; @@ -640,12 +636,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - spin_lock(&tcp_app_lock); - list_for_each_entry(inc, &tcp_apps[hash], p_list) { + spin_lock(&ipvs->tcp_app_lock); + list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&tcp_app_lock); + spin_unlock(&ipvs->tcp_app_lock); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -662,7 +658,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&tcp_app_lock); + spin_unlock(&ipvs->tcp_app_lock); out: return result; @@ -672,24 +668,34 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* * Set LISTEN timeout. (ip_vs_conn_put will setup timer) */ -void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) +void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) { + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + spin_lock(&cp->lock); cp->state = IP_VS_TCP_S_LISTEN; - cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; + cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] + : tcp_timeouts[IP_VS_TCP_S_LISTEN]); spin_unlock(&cp->lock); } - -static void ip_vs_tcp_init(struct ip_vs_protocol *pp) +/* --------------------------------------------- + * timeouts is netns related now. + * --------------------------------------------- + */ +static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(tcp_apps); - pp->timeout_table = tcp_timeouts; -} + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); + spin_lock_init(&ipvs->tcp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, + sizeof(tcp_timeouts)); +} -static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) +static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) { + kfree(pd->timeout_table); } @@ -699,8 +705,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .num_states = IP_VS_TCP_S_LAST, .dont_defrag = 0, .appcnt = ATOMIC_INIT(0), - .init = ip_vs_tcp_init, - .exit = ip_vs_tcp_exit, + .init = NULL, + .exit = NULL, + .init_netns = __ip_vs_tcp_init, + .exit_netns = __ip_vs_tcp_exit, .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, @@ -714,5 +722,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .app_conn_bind = tcp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = tcp_timeout_change, - .set_state_timeout = tcp_set_state_timeout, }; -- cgit v1.1 From 78b16bde104cc74bedbf462b0ebed2990f35ff6b Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:48 +0100 Subject: IPVS: netns preparation for proto_udp In this phase (one), all local vars will be moved to ipvs struct. Remaining work, add param struct net *net to a couple of functions that is common for all protos and use ip_vs_proto_data *v3 Removed unused function set_state_timeout() Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto.c | 3 ++ net/netfilter/ipvs/ip_vs_proto_udp.c | 86 ++++++++++++++++++------------------ 2 files changed, 46 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 320c6a6..cdc4142 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -310,6 +310,9 @@ static int __net_init __ip_vs_protocol_init(struct net *net) #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); #endif +#ifdef CONFIG_IP_VS_PROTO_UDP + register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); +#endif return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 5ab54f6..71a4721 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -9,7 +9,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: + * Changes: Hans Schillstrom + * Network name space (netns) aware. * */ @@ -345,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) return 1; } - -/* - * Note: the caller guarantees that only one of register_app, - * unregister_app or app_conn_bind is called each time. - */ - -#define UDP_APP_TAB_BITS 4 -#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) -#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) - -static struct list_head udp_apps[UDP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(udp_app_lock); - static inline __u16 udp_app_hashkey(__be16 port) { return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) @@ -371,22 +359,24 @@ static int udp_register_app(struct ip_vs_app *inc) __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); hash = udp_app_hashkey(port); - spin_lock_bh(&udp_app_lock); - list_for_each_entry(i, &udp_apps[hash], p_list) { + spin_lock_bh(&ipvs->udp_app_lock); + list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &udp_apps[hash]); - atomic_inc(&ip_vs_protocol_udp.appcnt); + list_add(&inc->p_list, &ipvs->udp_apps[hash]); + atomic_inc(&pd->pp->appcnt); out: - spin_unlock_bh(&udp_app_lock); + spin_unlock_bh(&ipvs->udp_app_lock); return ret; } @@ -394,15 +384,19 @@ static int udp_register_app(struct ip_vs_app *inc) static void udp_unregister_app(struct ip_vs_app *inc) { - spin_lock_bh(&udp_app_lock); - atomic_dec(&ip_vs_protocol_udp.appcnt); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(&init_net); + + spin_lock_bh(&ipvs->udp_app_lock); + atomic_dec(&pd->pp->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&udp_app_lock); + spin_unlock_bh(&ipvs->udp_app_lock); } static int udp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(&init_net); int hash; struct ip_vs_app *inc; int result = 0; @@ -414,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - spin_lock(&udp_app_lock); - list_for_each_entry(inc, &udp_apps[hash], p_list) { + spin_lock(&ipvs->udp_app_lock); + list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&udp_app_lock); + spin_unlock(&ipvs->udp_app_lock); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -436,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&udp_app_lock); + spin_unlock(&ipvs->udp_app_lock); out: return result; } -static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { +static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = 5*60*HZ, [IP_VS_UDP_S_LAST] = 2*HZ, }; @@ -453,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_LAST] = "BUG!", }; - -static int -udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST, - udp_state_name_table, sname, to); -} - static const char * udp_state_name(int state) { if (state >= IP_VS_UDP_S_LAST) @@ -473,18 +459,31 @@ udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_protocol *pp) { - cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; + struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */ + + pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); + if (unlikely(!pd)) { + pr_err("UDP no ns data\n"); + return 0; + } + + cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; return 1; } -static void udp_init(struct ip_vs_protocol *pp) +static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(udp_apps); - pp->timeout_table = udp_timeouts; + struct netns_ipvs *ipvs = net_ipvs(net); + + ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); + spin_lock_init(&ipvs->udp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, + sizeof(udp_timeouts)); } -static void udp_exit(struct ip_vs_protocol *pp) +static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) { + kfree(pd->timeout_table); } @@ -493,8 +492,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .protocol = IPPROTO_UDP, .num_states = IP_VS_UDP_S_LAST, .dont_defrag = 0, - .init = udp_init, - .exit = udp_exit, + .init = NULL, + .exit = NULL, + .init_netns = __udp_init, + .exit_netns = __udp_exit, .conn_schedule = udp_conn_schedule, .conn_in_get = ip_vs_conn_in_get_proto, .conn_out_get = ip_vs_conn_out_get_proto, @@ -508,5 +509,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .app_conn_bind = udp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, - .set_state_timeout = udp_set_state_timeout, }; -- cgit v1.1 From 9d934878e7870fbbbd8eaed2e467552536877def Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:49 +0100 Subject: IPVS: netns preparation for proto_sctp In this phase (one), all local vars will be moved to ipvs struct. Remaining work, add param struct net *net to a couple of functions that is common for all protos and use ip_vs_proto_data *v3 Removed unuset function set_state_timeout() Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto.c | 3 + net/netfilter/ipvs/ip_vs_proto_sctp.c | 121 ++++++++++++++++------------------ 2 files changed, 61 insertions(+), 63 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index cdc4142..001b2f8 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -313,6 +313,9 @@ static int __net_init __ip_vs_protocol_init(struct net *net) #ifdef CONFIG_IP_VS_PROTO_UDP register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); #endif +#ifdef CONFIG_IP_VS_PROTO_SCTP + register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); +#endif return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 521b827..f826dd1 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -862,7 +862,7 @@ static struct ipvs_sctp_nextstate /* * Timeout table[state] */ -static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { +static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { [IP_VS_SCTP_S_NONE] = 2 * HZ, [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, @@ -906,18 +906,6 @@ static const char *sctp_state_name(int state) return "?"; } -static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags) -{ -} - -static int -sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - -return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST, - sctp_state_name_table, sname, to); -} - static inline int set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) @@ -926,6 +914,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, unsigned char chunk_type; int event, next_state; int ihl; + struct ip_vs_proto_data *pd; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -1001,10 +990,13 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } } + pd = ip_vs_proto_data_get(&init_net, pp->protocol); /* tmp fix */ + if (likely(pd)) + cp->timeout = pd->timeout_table[cp->state = next_state]; + else /* What to do ? */ + cp->timeout = sctp_timeouts[cp->state = next_state]; - cp->timeout = pp->timeout_table[cp->state = next_state]; - - return 1; + return 1; } static int @@ -1020,16 +1012,6 @@ sctp_state_transition(struct ip_vs_conn *cp, int direction, return ret; } -/* - * Hash table for SCTP application incarnations - */ -#define SCTP_APP_TAB_BITS 4 -#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) -#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) - -static struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(sctp_app_lock); - static inline __u16 sctp_app_hashkey(__be16 port) { return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) @@ -1042,34 +1024,40 @@ static int sctp_register_app(struct ip_vs_app *inc) __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); hash = sctp_app_hashkey(port); - spin_lock_bh(&sctp_app_lock); - list_for_each_entry(i, &sctp_apps[hash], p_list) { + spin_lock_bh(&ipvs->sctp_app_lock); + list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &sctp_apps[hash]); - atomic_inc(&ip_vs_protocol_sctp.appcnt); + list_add(&inc->p_list, &ipvs->sctp_apps[hash]); + atomic_inc(&pd->pp->appcnt); out: - spin_unlock_bh(&sctp_app_lock); + spin_unlock_bh(&ipvs->sctp_app_lock); return ret; } static void sctp_unregister_app(struct ip_vs_app *inc) { - spin_lock_bh(&sctp_app_lock); - atomic_dec(&ip_vs_protocol_sctp.appcnt); + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); + + spin_lock_bh(&ipvs->sctp_app_lock); + atomic_dec(&pd->pp->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&sctp_app_lock); + spin_unlock_bh(&ipvs->sctp_app_lock); } static int sctp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(&init_net); int hash; struct ip_vs_app *inc; int result = 0; @@ -1080,12 +1068,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - spin_lock(&sctp_app_lock); - list_for_each_entry(inc, &sctp_apps[hash], p_list) { + spin_lock(&ipvs->sctp_app_lock); + list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&sctp_app_lock); + spin_unlock(&ipvs->sctp_app_lock); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -1101,43 +1089,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&sctp_app_lock); + spin_unlock(&ipvs->sctp_app_lock); out: return result; } -static void ip_vs_sctp_init(struct ip_vs_protocol *pp) +/* --------------------------------------------- + * timeouts is netns related now. + * --------------------------------------------- + */ +static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(sctp_apps); - pp->timeout_table = sctp_timeouts; -} + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); + spin_lock_init(&ipvs->tcp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, + sizeof(sctp_timeouts)); +} -static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) +static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) { - + kfree(pd->timeout_table); } struct ip_vs_protocol ip_vs_protocol_sctp = { - .name = "SCTP", - .protocol = IPPROTO_SCTP, - .num_states = IP_VS_SCTP_S_LAST, - .dont_defrag = 0, - .appcnt = ATOMIC_INIT(0), - .init = ip_vs_sctp_init, - .exit = ip_vs_sctp_exit, - .register_app = sctp_register_app, + .name = "SCTP", + .protocol = IPPROTO_SCTP, + .num_states = IP_VS_SCTP_S_LAST, + .dont_defrag = 0, + .init = NULL, + .exit = NULL, + .init_netns = __ip_vs_sctp_init, + .exit_netns = __ip_vs_sctp_exit, + .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, - .conn_schedule = sctp_conn_schedule, - .conn_in_get = ip_vs_conn_in_get_proto, - .conn_out_get = ip_vs_conn_out_get_proto, - .snat_handler = sctp_snat_handler, - .dnat_handler = sctp_dnat_handler, - .csum_check = sctp_csum_check, - .state_name = sctp_state_name, + .conn_schedule = sctp_conn_schedule, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, + .snat_handler = sctp_snat_handler, + .dnat_handler = sctp_dnat_handler, + .csum_check = sctp_csum_check, + .state_name = sctp_state_name, .state_transition = sctp_state_transition, - .app_conn_bind = sctp_app_conn_bind, - .debug_packet = ip_vs_tcpudp_debug_packet, - .timeout_change = sctp_timeout_change, - .set_state_timeout = sctp_set_state_timeout, + .app_conn_bind = sctp_app_conn_bind, + .debug_packet = ip_vs_tcpudp_debug_packet, + .timeout_change = NULL, }; -- cgit v1.1 From 88fe2d372793a71ae4f6319a16f537d56a83906c Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:50 +0100 Subject: IPVS: netns preparation for proto_ah_esp In this phase (one), all local vars will be moved to ipvs struct. Remaining work, add param struct net *net to a couple of functions that common for all protos. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto.c | 6 ++++++ net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 20 ++++---------------- 2 files changed, 10 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 001b2f8..9f609d4 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -316,6 +316,12 @@ static int __net_init __ip_vs_protocol_init(struct net *net) #ifdef CONFIG_IP_VS_PROTO_SCTP register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); #endif +#ifdef CONFIG_IP_VS_PROTO_AH + register_ip_vs_proto_netns(net, &ip_vs_protocol_ah); +#endif +#ifdef CONFIG_IP_VS_PROTO_ESP + register_ip_vs_proto_netns(net, &ip_vs_protocol_esp); +#endif return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 3a04611..b8b37fa 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -117,26 +117,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; } -static void ah_esp_init(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -static void ah_esp_exit(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - #ifdef CONFIG_IP_VS_PROTO_AH struct ip_vs_protocol ip_vs_protocol_ah = { .name = "AH", .protocol = IPPROTO_AH, .num_states = 1, .dont_defrag = 1, - .init = ah_esp_init, - .exit = ah_esp_exit, + .init = NULL, + .exit = NULL, .conn_schedule = ah_esp_conn_schedule, .conn_in_get = ah_esp_conn_in_get, .conn_out_get = ah_esp_conn_out_get, @@ -159,8 +147,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = { .protocol = IPPROTO_ESP, .num_states = 1, .dont_defrag = 1, - .init = ah_esp_init, - .exit = ah_esp_exit, + .init = NULL, + .exit = NULL, .conn_schedule = ah_esp_conn_schedule, .conn_in_get = ah_esp_conn_in_get, .conn_out_get = ah_esp_conn_out_get, -- cgit v1.1 From 9330419d9aa4f97df412ac9be9fc0388c67dd315 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:51 +0100 Subject: IPVS: netns, use ip_vs_proto_data as param. ip_vs_protocol *pp is replaced by ip_vs_proto_data *pd in function call in ip_vs_protocol struct i.e. :, - timeout_change() - state_transition() ip_vs_protocol_timeout_change() got ipvs as param, due to above and a upcoming patch - defence work Most of this changes are triggered by Julians comment: "tcp_timeout_change should work with the new struct ip_vs_proto_data so that tcp_state_table will go to pd->state_table and set_tcp_state will get pd instead of pp" *v3 Mostly comments from Julian The pp -> pd conversion should start from functions like ip_vs_out() that use pp = ip_vs_proto_get(iph.protocol), now they should use ip_vs_proto_data_get(net, iph.protocol). conn_in_get() and conn_out_get() unused param *pp, removed. *v4 ip_vs_protocol_timeout_change() walk the proto_data path. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 2 - net/netfilter/ipvs/ip_vs_core.c | 77 ++++++++++++++++++++------------- net/netfilter/ipvs/ip_vs_ctl.c | 55 ++++++++++++++--------- net/netfilter/ipvs/ip_vs_proto.c | 21 ++++++--- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 10 ++--- net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++---- net/netfilter/ipvs/ip_vs_proto_tcp.c | 27 +++++------- net/netfilter/ipvs/ip_vs_proto_udp.c | 11 ++--- net/netfilter/xt_ipvs.c | 2 +- 9 files changed, 123 insertions(+), 98 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7a0e79e..a7aba6a 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -329,7 +329,6 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { @@ -428,7 +427,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d0616ea..9317aff 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -177,11 +177,11 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) static inline int ip_vs_set_state(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { - if (unlikely(!pp->state_transition)) + if (unlikely(!pd->pp->state_transition)) return 0; - return pp->state_transition(cp, direction, skb, pp); + return pd->pp->state_transition(cp, direction, skb, pd); } static inline int @@ -378,8 +378,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp, int *ignored) + struct ip_vs_proto_data *pd, int *ignored) { + struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; @@ -408,7 +409,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { + (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); @@ -479,11 +480,12 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; int unicast; + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -530,10 +532,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_in_stats(cp, skb); /* set state */ - cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); + cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ - ret = cp->packet_xmit(skb, cp, pp); + ret = cp->packet_xmit(skb, cp, pd->pp); /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); @@ -840,7 +842,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -917,7 +919,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -956,9 +958,11 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) * Used for NAT and local client. */ static unsigned int -handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { + struct ip_vs_protocol *pp = pd->pp; + IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); if (!skb_make_writable(skb, ihl)) @@ -1007,7 +1011,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); - ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); @@ -1034,6 +1038,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; EnterFunction(11); @@ -1079,9 +1084,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - pp = ip_vs_proto_get(iph.protocol); - if (unlikely(!pp)) + pd = ip_vs_proto_data_get(net, iph.protocol); + if (unlikely(!pd)) return NF_ACCEPT; + pp = pd->pp; /* reassemble IP fragments */ #ifdef CONFIG_IP_VS_IPV6 @@ -1107,10 +1113,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); + cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); if (likely(cp)) - return handle_response(af, skb, pp, cp, iph.len); + return handle_response(af, skb, pd, cp, iph.len); if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || @@ -1236,12 +1242,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, static int ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) { + struct net *net = NULL; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; unsigned int offset, ihl, verdict; union nf_inet_addr snet; @@ -1283,9 +1291,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(cih->protocol); - if (!pp) + net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); + if (!pd) return NF_ACCEPT; + pp = pd->pp; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && @@ -1299,10 +1309,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); if (!cp) { /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); if (cp) { snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, @@ -1346,6 +1356,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) { + struct net *net = NULL; struct ipv6hdr *iph; struct icmp6hdr _icmph, *ic; struct ipv6hdr _ciph, *cih; /* The ip header contained @@ -1353,6 +1364,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; unsigned int offset, verdict; union nf_inet_addr snet; struct rt6_info *rt; @@ -1395,9 +1407,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(cih->nexthdr); - if (!pp) + net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->nexthdr); + if (!pd) return NF_ACCEPT; + pp = pd->pp; /* Is the embedded protocol header present? */ /* TODO: we don't support fragmentation at the moment anyways */ @@ -1411,10 +1425,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) { /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); if (cp) { ipv6_addr_copy(&snet.in6, &iph->saddr); return handle_response_icmp(AF_INET6, skb, &snet, @@ -1457,8 +1471,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) static unsigned int ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) { + struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, restart, pkts; @@ -1514,20 +1530,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } + net = skb_net(skb); /* Protocol supported? */ - pp = ip_vs_proto_get(iph.protocol); - if (unlikely(!pp)) + pd = ip_vs_proto_data_get(net, iph.protocol); + if (unlikely(!pd)) return NF_ACCEPT; - + pp = pd->pp; /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); + cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); if (unlikely(!cp)) { int v; - if (!pp->conn_schedule(af, skb, pp, &v, &cp)) + if (!pp->conn_schedule(af, skb, pd, &v, &cp)) return v; } @@ -1555,7 +1572,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } ip_vs_in_stats(cp, skb); - restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); + restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp, pp); /* do not touch skb anymore */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2d7c96b..88474f1 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -38,6 +38,7 @@ #include #include +#include #include #ifdef CONFIG_IP_VS_IPV6 #include @@ -125,7 +126,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs */ -static void update_defense_level(void) +static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; static int old_secure_tcp = 0; @@ -239,7 +240,8 @@ static void update_defense_level(void) } old_secure_tcp = sysctl_ip_vs_secure_tcp; if (to_change >= 0) - ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); + ip_vs_protocol_timeout_change(ipvs, + sysctl_ip_vs_secure_tcp > 1); spin_unlock(&ip_vs_securetcp_lock); local_bh_enable(); @@ -255,7 +257,10 @@ static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); static void defense_work_handler(struct work_struct *work) { - update_defense_level(); + struct net *net = &init_net; + struct netns_ipvs *ipvs = net_ipvs(net); + + update_defense_level(ipvs); if (atomic_read(&ip_vs_dropentry)) ip_vs_random_dropentry(); @@ -1502,6 +1507,7 @@ static int proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = current->nsproxy->net_ns; int *valp = table->data; int val = *valp; int rc; @@ -1512,7 +1518,7 @@ proc_do_defense_mode(ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - update_defense_level(); + update_defense_level(net_ipvs(net)); } } return rc; @@ -2033,8 +2039,10 @@ static const struct file_operations ip_vs_stats_fops = { /* * Set timeout values for tcp tcpfin udp in the timeout_table. */ -static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) +static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) { + struct ip_vs_proto_data *pd; + IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", u->tcp_timeout, u->tcp_fin_timeout, @@ -2042,19 +2050,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] = u->tcp_timeout * HZ; } if (u->tcp_fin_timeout) { - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] = u->tcp_fin_timeout * HZ; } #endif #ifdef CONFIG_IP_VS_PROTO_UDP if (u->udp_timeout) { - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + pd->timeout_table[IP_VS_UDP_S_NORMAL] = u->udp_timeout * HZ; } #endif @@ -2158,7 +2169,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ - ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); + ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; @@ -2370,17 +2381,19 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, } static inline void -__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) +__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) { + struct ip_vs_proto_data *pd; + #ifdef CONFIG_IP_VS_PROTO_TCP - u->tcp_timeout = - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; - u->tcp_fin_timeout = - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; + u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; #endif #ifdef CONFIG_IP_VS_PROTO_UDP + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); u->udp_timeout = - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; + pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; #endif } @@ -2521,7 +2534,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; } @@ -3092,11 +3105,11 @@ static int ip_vs_genl_del_daemon(struct nlattr **attrs) return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); } -static int ip_vs_genl_set_config(struct nlattr **attrs) +static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); @@ -3108,7 +3121,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs) if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); - return ip_vs_set_timeout(&t); + return ip_vs_set_timeout(net, &t); } static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) @@ -3129,7 +3142,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ret = ip_vs_flush(net); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(info->attrs); + ret = ip_vs_genl_set_config(net, info->attrs); goto out; } else if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { @@ -3281,7 +3294,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); #ifdef CONFIG_IP_VS_PROTO_TCP NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 9f609d4..6ac986c 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -152,9 +152,8 @@ EXPORT_SYMBOL(ip_vs_proto_get); * get ip_vs_protocol object data by netns and proto */ struct ip_vs_proto_data * -ip_vs_proto_data_get(struct net *net, unsigned short proto) +__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; unsigned hash = IP_VS_PROTO_HASH(proto); @@ -165,20 +164,28 @@ ip_vs_proto_data_get(struct net *net, unsigned short proto) return NULL; } + +struct ip_vs_proto_data * +ip_vs_proto_data_get(struct net *net, unsigned short proto) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + return __ipvs_proto_data_get(ipvs, proto); +} EXPORT_SYMBOL(ip_vs_proto_data_get); /* * Propagate event for state change to all protocols */ -void ip_vs_protocol_timeout_change(int flags) +void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) { - struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; int i; for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { - for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { - if (pp->timeout_change) - pp->timeout_change(pp, flags); + for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { + if (pd->pp->timeout_change) + pd->pp->timeout_change(pd, flags); } } } diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index b8b37fa..28039cb 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -55,7 +55,7 @@ ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, } static struct ip_vs_conn * -ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, +ah_esp_conn_in_get(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { @@ -72,7 +72,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " "%s%s %s->%s\n", inverse ? "ICMP+" : "", - pp->name, + ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); } @@ -83,7 +83,6 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, static struct ip_vs_conn * ah_esp_conn_out_get(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) @@ -97,7 +96,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " "%s%s %s->%s\n", inverse ? "ICMP+" : "", - pp->name, + ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); } @@ -107,7 +106,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, static int -ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { /* @@ -137,7 +136,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = { .app_conn_bind = NULL, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, /* ISAKMP */ - .set_state_timeout = NULL, }; #endif diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index f826dd1..19bc379 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -9,7 +9,7 @@ #include static int -sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -47,10 +47,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -907,14 +907,13 @@ static const char *sctp_state_name(int state) } static inline int -set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, +set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) { sctp_chunkhdr_t _sctpch, *sch; unsigned char chunk_type; int event, next_state; int ihl; - struct ip_vs_proto_data *pd; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -966,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, IP_VS_DBG_BUF(8, "%s %s %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", - pp->name, + pd->pp->name, ((direction == IP_VS_DIR_OUTPUT) ? "output " : "input "), IP_VS_DBG_ADDR(cp->af, &cp->daddr), @@ -990,7 +989,6 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } } - pd = ip_vs_proto_data_get(&init_net, pp->protocol); /* tmp fix */ if (likely(pd)) cp->timeout = pd->timeout_table[cp->state = next_state]; else /* What to do ? */ @@ -1001,12 +999,12 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, static int sctp_state_transition(struct ip_vs_conn *cp, int direction, - const struct sk_buff *skb, struct ip_vs_protocol *pp) + const struct sk_buff *skb, struct ip_vs_proto_data *pd) { int ret = 0; spin_lock(&cp->lock); - ret = set_sctp_state(pp, cp, direction, skb); + ret = set_sctp_state(pd, cp, direction, skb); spin_unlock(&cp->lock); return ret; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 9d9df3d..d7c2455 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -32,7 +32,7 @@ #include static int -tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -68,10 +68,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -448,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = { /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; -static struct tcp_states_t *tcp_state_table = tcp_states; - - -static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) +static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags) { int on = (flags & 1); /* secure_tcp */ @@ -461,7 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) ** for most if not for all of the applications. Something ** like "capabilities" (flags) for each object. */ - tcp_state_table = (on? tcp_states_dos : tcp_states); + pd->tcp_state_table = (on ? tcp_states_dos : tcp_states); } static inline int tcp_state_idx(struct tcphdr *th) @@ -478,13 +475,12 @@ static inline int tcp_state_idx(struct tcphdr *th) } static inline void -set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, +set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, struct tcphdr *th) { int state_idx; int new_state = IP_VS_TCP_S_CLOSE; int state_off = tcp_state_off[direction]; - struct ip_vs_proto_data *pd; /* Temp fix */ /* * Update state offset to INPUT_ONLY if necessary @@ -502,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, goto tcp_state_out; } - new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; + new_state = + pd->tcp_state_table[state_off+state_idx].next_state[cp->state]; tcp_state_out: if (new_state != cp->state) { @@ -510,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", - pp->name, + pd->pp->name, ((state_off == TCP_DIR_OUTPUT) ? "output " : "input "), th->syn ? 'S' : '.', @@ -540,7 +537,6 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } - pd = ip_vs_proto_data_get(&init_net, pp->protocol); if (likely(pd)) cp->timeout = pd->timeout_table[cp->state = new_state]; else /* What to do ? */ @@ -553,7 +549,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, static int tcp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { struct tcphdr _tcph, *th; @@ -568,7 +564,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, return 0; spin_lock(&cp->lock); - set_tcp_state(pp, cp, direction, th); + set_tcp_state(pd, cp, direction, th); spin_unlock(&cp->lock); return 1; @@ -691,6 +687,7 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); + pd->tcp_state_table = tcp_states; } static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 71a4721..aa85df2 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -29,7 +29,7 @@ #include static int -udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -64,10 +64,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -457,11 +457,8 @@ static const char * udp_state_name(int state) static int udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { - struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */ - - pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); if (unlikely(!pd)) { pr_err("UDP no ns data\n"); return 0; diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 9127a3d..bb10b07 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); + cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); if (unlikely(cp == NULL)) { match = false; goto out; -- cgit v1.1 From 9bbac6a904d0816dae58b454692c54d6773cc20d Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:52 +0100 Subject: IPVS: netns, common protocol changes and use of appcnt. appcnt and timeout_table moved from struct ip_vs_protocol to ip_vs proto_data. struct net *net added as first param to - register_app() - unregister_app() - app_conn_bind() - ip_vs_conn_new() [horms@verge.net.au: removed cosmetic-change-only hunk] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 6 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 4 +-- net/netfilter/ipvs/ip_vs_proto_tcp.c | 5 ++-- net/netfilter/ipvs/ip_vs_proto_udp.c | 4 +-- net/netfilter/ipvs/ip_vs_sync.c | 55 +++++++++++++++++++---------------- 5 files changed, 39 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a7aba6a..b2024c9 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -804,7 +804,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { @@ -863,8 +863,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, #endif ip_vs_bind_xmit(cp); - if (unlikely(pp && atomic_read(&pp->appcnt))) - ip_vs_bind_app(cp, pp); + if (unlikely(pd && atomic_read(&pd->appcnt))) + ip_vs_bind_app(cp, pd->pp); /* * Allow conntrack to be preserved. By default, conntrack diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 19bc379..0f14f79 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1035,7 +1035,7 @@ static int sctp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->sctp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->sctp_app_lock); @@ -1048,7 +1048,7 @@ static void sctp_unregister_app(struct ip_vs_app *inc) struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); spin_lock_bh(&ipvs->sctp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->sctp_app_lock); } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d7c2455..290b380 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -596,7 +596,7 @@ static int tcp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->tcp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->tcp_app_lock); @@ -611,7 +611,7 @@ tcp_unregister_app(struct ip_vs_app *inc) struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); spin_lock_bh(&ipvs->tcp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->tcp_app_lock); } @@ -701,7 +701,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .protocol = IPPROTO_TCP, .num_states = IP_VS_TCP_S_LAST, .dont_defrag = 0, - .appcnt = ATOMIC_INIT(0), .init = NULL, .exit = NULL, .init_netns = __ip_vs_tcp_init, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index aa85df2..3719837 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -373,7 +373,7 @@ static int udp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->udp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->udp_app_lock); @@ -388,7 +388,7 @@ udp_unregister_app(struct ip_vs_app *inc) struct netns_ipvs *ipvs = net_ipvs(&init_net); spin_lock_bh(&ipvs->udp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->udp_app_lock); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 662aa2c..6831e8f 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -725,17 +725,16 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, * Param: ... * timeout is in sec. */ -static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, - unsigned state, unsigned protocol, unsigned type, +static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, + unsigned int flags, unsigned int state, + unsigned int protocol, unsigned int type, const union nf_inet_addr *daddr, __be16 dport, unsigned long timeout, __u32 fwmark, - struct ip_vs_sync_conn_options *opt, - struct ip_vs_protocol *pp) + struct ip_vs_sync_conn_options *opt) { struct ip_vs_dest *dest; struct ip_vs_conn *cp; - if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(param); else @@ -821,17 +820,23 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) timeout = MAX_SCHEDULE_TIMEOUT / HZ; cp->timeout = timeout*HZ; - } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) - cp->timeout = pp->timeout_table[state]; - else - cp->timeout = (3*60*HZ); + } else { + struct ip_vs_proto_data *pd; + + pd = ip_vs_proto_data_get(net, protocol); + if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) + cp->timeout = pd->timeout_table[state]; + else + cp->timeout = (3*60*HZ); + } ip_vs_conn_put(cp); } /* * Process received multicast message for Version 0 */ -static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) +static void ip_vs_process_message_v0(struct net *net, const char *buffer, + const size_t buflen) { struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; struct ip_vs_sync_conn_v0 *s; @@ -879,7 +884,6 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) } } else { /* protocol in templates is not used for state/timeout */ - pp = NULL; if (state > 0) { IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", state); @@ -894,9 +898,9 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) s->vport, ¶m); /* Send timeout as Zero */ - ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET, + ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, (union nf_inet_addr *)&s->daddr, s->dport, - 0, 0, opt, pp); + 0, 0, opt); } } @@ -945,7 +949,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, /* * Process a Version 1 sync. connection */ -static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) +static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) { struct ip_vs_sync_conn_options opt; union ip_vs_sync_conn *s; @@ -1043,7 +1047,6 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) } } else { /* protocol in templates is not used for state/timeout */ - pp = NULL; if (state > 0) { IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", state); @@ -1058,18 +1061,18 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) } /* If only IPv4, just silent skip IPv6 */ if (af == AF_INET) - ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af, + ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, ntohl(s->v4.timeout), ntohl(s->v4.fwmark), - (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), - pp); + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) + ); #ifdef CONFIG_IP_VS_IPV6 else - ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af, + ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, ntohl(s->v6.timeout), ntohl(s->v6.fwmark), - (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), - pp); + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) + ); #endif return 0; /* Error exit */ @@ -1083,7 +1086,8 @@ out: * ip_vs_conn entries. * Handles Version 0 & 1 */ -static void ip_vs_process_message(__u8 *buffer, const size_t buflen) +static void ip_vs_process_message(struct net *net, __u8 *buffer, + const size_t buflen) { struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; @@ -1136,7 +1140,8 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) return; } /* Process a single sync_conn */ - if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) { + retc = ip_vs_proc_sync_conn(net, p, msg_end); + if (retc < 0) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", retc); return; @@ -1146,7 +1151,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) } } else { /* Old type of message */ - ip_vs_process_message_v0(buffer, buflen); + ip_vs_process_message_v0(net, buffer, buflen); return; } } @@ -1500,7 +1505,7 @@ static int sync_thread_backup(void *data) /* disable bottom half, because it accesses the data shared by softirq while getting/creating conns */ local_bh_disable(); - ip_vs_process_message(tinfo->buf, len); + ip_vs_process_message(&init_net, tinfo->buf, len); local_bh_enable(); } } -- cgit v1.1 From ab8a5e8408c3df2d654611bffc3aaf04f418b266 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:53 +0100 Subject: IPVS: netns awareness to ip_vs_app All variables moved to struct ipvs, most external changes fixed (i.e. init_net removed) in ip_vs_protocol param struct net *net added to: - register_app() - unregister_app() This affected almost all proto_xxx.c files Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_app.c | 73 +++++++++++++++++++++-------------- net/netfilter/ipvs/ip_vs_ftp.c | 8 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 12 +++--- net/netfilter/ipvs/ip_vs_proto_tcp.c | 12 +++--- net/netfilter/ipvs/ip_vs_proto_udp.c | 12 +++--- 5 files changed, 65 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 40b09cc..286f465 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); EXPORT_SYMBOL(register_ip_vs_app_inc); -/* ipvs application list head */ -static LIST_HEAD(ip_vs_app_list); -static DEFINE_MUTEX(__ip_vs_app_mutex); - - /* * Get an ip_vs_app object */ @@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) * Allocate/initialize app incarnation and register it in proto apps. */ static int -ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) +ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, + __u16 port) { struct ip_vs_protocol *pp; struct ip_vs_app *inc; @@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) } } - ret = pp->register_app(inc); + ret = pp->register_app(net, inc); if (ret) goto out; @@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) * Release app incarnation */ static void -ip_vs_app_inc_release(struct ip_vs_app *inc) +ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) { struct ip_vs_protocol *pp; @@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc) return; if (pp->unregister_app) - pp->unregister_app(inc); + pp->unregister_app(net, inc); IP_VS_DBG(9, "%s App %s:%u unregistered\n", pp->name, inc->name, ntohs(inc->port)); @@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc) * Register an application incarnation in protocol applications */ int -register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) +register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, + __u16 port) { + struct netns_ipvs *ipvs = net_ipvs(net); int result; - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); - result = ip_vs_app_inc_new(app, proto, port); + result = ip_vs_app_inc_new(net, app, proto, port); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); return result; } @@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) /* * ip_vs_app registration routine */ -int register_ip_vs_app(struct ip_vs_app *app) +int register_ip_vs_app(struct net *net, struct ip_vs_app *app) { + struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); - list_add(&app->a_list, &ip_vs_app_list); + list_add(&app->a_list, &ipvs->app_list); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); return 0; } @@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app) * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services */ -void unregister_ip_vs_app(struct ip_vs_app *app) +void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *inc, *nxt; - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { - ip_vs_app_inc_release(inc); + ip_vs_app_inc_release(net, inc); } list_del(&app->a_list); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); @@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app) /* * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) */ -int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) +int ip_vs_bind_app(struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) { return pp->app_conn_bind(cp); } @@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) * /proc/net/ip_vs_app entry function */ -static struct ip_vs_app *ip_vs_app_idx(loff_t pos) +static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) { struct ip_vs_app *app, *inc; - list_for_each_entry(app, &ip_vs_app_list, a_list) { + list_for_each_entry(app, &ipvs->app_list, a_list) { list_for_each_entry(inc, &app->incs_list, a_list) { if (pos-- == 0) return inc; @@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos) static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) { - mutex_lock(&__ip_vs_app_mutex); + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + + mutex_lock(&ipvs->app_mutex); - return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_app *inc, *app; struct list_head *e; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); ++*pos; if (v == SEQ_START_TOKEN) - return ip_vs_app_idx(0); + return ip_vs_app_idx(ipvs, 0); inc = v; app = inc->app; @@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) return list_entry(e, struct ip_vs_app, a_list); /* go on to next application */ - for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { + for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { app = list_entry(e, struct ip_vs_app, a_list); list_for_each_entry(inc, &app->incs_list, a_list) { return inc; @@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) { - mutex_unlock(&__ip_vs_app_mutex); + struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); + + mutex_unlock(&ipvs->app_mutex); } static int ip_vs_app_seq_show(struct seq_file *seq, void *v) @@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = { static int ip_vs_app_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_app_seq_ops); + return seq_open_net(inode, file, &ip_vs_app_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ip_vs_app_fops = { @@ -571,9 +580,13 @@ static const struct file_operations ip_vs_app_fops = { static int __net_init __ip_vs_app_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + INIT_LIST_HEAD(&ipvs->app_list); + __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index b38ae94..77b0036 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -414,14 +414,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; - ret = register_ip_vs_app(app); + ret = register_ip_vs_app(net, app); if (ret) return ret; for (i=0; iprotocol, ports[i]); + ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); if (ret) break; pr_info("%s: loaded support on port[%d] = %d\n", @@ -429,7 +429,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) } if (ret) - unregister_ip_vs_app(app); + unregister_ip_vs_app(net, app); return ret; } @@ -443,7 +443,7 @@ static void __ip_vs_ftp_exit(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; - unregister_ip_vs_app(app); + unregister_ip_vs_app(net, app); } static struct pernet_operations ip_vs_ftp_ops = { diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 0f14f79..569e77b 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1016,14 +1016,14 @@ static inline __u16 sctp_app_hashkey(__be16 port) & SCTP_APP_TAB_MASK; } -static int sctp_register_app(struct ip_vs_app *inc) +static int sctp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); hash = sctp_app_hashkey(port); @@ -1042,10 +1042,10 @@ out: return ret; } -static void sctp_unregister_app(struct ip_vs_app *inc) +static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); spin_lock_bh(&ipvs->sctp_app_lock); atomic_dec(&pd->appcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 290b380..757aaaf 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -577,14 +577,14 @@ static inline __u16 tcp_app_hashkey(__be16 port) } -static int tcp_register_app(struct ip_vs_app *inc) +static int tcp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); hash = tcp_app_hashkey(port); @@ -605,10 +605,10 @@ static int tcp_register_app(struct ip_vs_app *inc) static void -tcp_unregister_app(struct ip_vs_app *inc) +tcp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); spin_lock_bh(&ipvs->tcp_app_lock); atomic_dec(&pd->appcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 3719837..1dc3941 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -353,14 +353,14 @@ static inline __u16 udp_app_hashkey(__be16 port) } -static int udp_register_app(struct ip_vs_app *inc) +static int udp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); hash = udp_app_hashkey(port); @@ -382,10 +382,10 @@ static int udp_register_app(struct ip_vs_app *inc) static void -udp_unregister_app(struct ip_vs_app *inc) +udp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(net); spin_lock_bh(&ipvs->udp_app_lock); atomic_dec(&pd->appcnt); -- cgit v1.1 From 29c2026fd4980c144d9c746dc1565060f08e5796 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:54 +0100 Subject: IPVS: netns awareness to ip_vs_est All variables moved to struct ipvs, most external changes fixed (i.e. init_net removed) *v3 timer per ns instead of a common timer in estimator. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 20 +++++----- net/netfilter/ipvs/ip_vs_est.c | 86 +++++++++++++++++++++++------------------- 2 files changed, 58 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 88474f1..c89beb8 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -816,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock(&dest->dst_lock); if (add) - ip_vs_new_estimator(&dest->stats); + ip_vs_new_estimator(svc->net, &dest->stats); write_lock_bh(&__ip_vs_svc_lock); @@ -1009,9 +1009,9 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct ip_vs_dest *dest) +static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { - ip_vs_kill_estimator(&dest->stats); + ip_vs_kill_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. @@ -1080,6 +1080,7 @@ static int ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; + struct net *net = svc->net; __be16 dport = udest->port; EnterFunction(2); @@ -1108,7 +1109,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(dest); + __ip_vs_del_dest(net, dest); LeaveFunction(2); @@ -1197,7 +1198,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, else if (svc->port == 0) atomic_inc(&ip_vs_nullsvc_counter); - ip_vs_new_estimator(&svc->stats); + ip_vs_new_estimator(net, &svc->stats); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1345,7 +1346,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) if (svc->af == AF_INET) ip_vs_num_services--; - ip_vs_kill_estimator(&svc->stats); + ip_vs_kill_estimator(svc->net, &svc->stats); /* Unbind scheduler */ old_sched = svc->scheduler; @@ -1368,7 +1369,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(dest); + __ip_vs_del_dest(svc->net, dest); } /* @@ -3460,7 +3461,7 @@ int __net_init __ip_vs_control_init(struct net *net) vs_vars); if (sysctl_header == NULL) goto err_reg; - ip_vs_new_estimator(&ip_vs_stats); + ip_vs_new_estimator(net, &ip_vs_stats); return 0; err_reg: @@ -3472,7 +3473,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; - ip_vs_kill_estimator(&ip_vs_stats); + ip_vs_kill_estimator(net, &ip_vs_stats); unregister_net_sysctl_table(sysctl_header); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); @@ -3536,7 +3537,6 @@ void ip_vs_control_cleanup(void) ip_vs_trash_cleanup(); cancel_delayed_work_sync(&defense_work); cancel_work_sync(&defense_work.work); - ip_vs_kill_estimator(&ip_vs_stats); unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 7417a0c..07d839b 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -8,8 +8,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: - * + * Changes: Hans Schillstrom + * Network name space (netns) aware. + * Global data moved to netns i.e struct netns_ipvs + * Affected data: est_list and est_lock. + * estimation_timer() runs with timer per netns. + * get_stats()) do the per cpu summing. */ #define KMSG_COMPONENT "IPVS" @@ -48,12 +52,6 @@ */ -static void estimation_timer(unsigned long arg); - -static LIST_HEAD(est_list); -static DEFINE_SPINLOCK(est_lock); -static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); - static void estimation_timer(unsigned long arg) { struct ip_vs_estimator *e; @@ -62,9 +60,12 @@ static void estimation_timer(unsigned long arg) u32 n_inpkts, n_outpkts; u64 n_inbytes, n_outbytes; u32 rate; + struct net *net = (struct net *)arg; + struct netns_ipvs *ipvs; - spin_lock(&est_lock); - list_for_each_entry(e, &est_list, list) { + ipvs = net_ipvs(net); + spin_lock(&ipvs->est_lock); + list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); @@ -75,38 +76,39 @@ static void estimation_timer(unsigned long arg) n_outbytes = s->ustats.outbytes; /* scaled by 2^10, but divided 2 seconds */ - rate = (n_conns - e->last_conns)<<9; + rate = (n_conns - e->last_conns) << 9; e->last_conns = n_conns; - e->cps += ((long)rate - (long)e->cps)>>2; - s->ustats.cps = (e->cps+0x1FF)>>10; + e->cps += ((long)rate - (long)e->cps) >> 2; + s->ustats.cps = (e->cps + 0x1FF) >> 10; - rate = (n_inpkts - e->last_inpkts)<<9; + rate = (n_inpkts - e->last_inpkts) << 9; e->last_inpkts = n_inpkts; - e->inpps += ((long)rate - (long)e->inpps)>>2; - s->ustats.inpps = (e->inpps+0x1FF)>>10; + e->inpps += ((long)rate - (long)e->inpps) >> 2; + s->ustats.inpps = (e->inpps + 0x1FF) >> 10; - rate = (n_outpkts - e->last_outpkts)<<9; + rate = (n_outpkts - e->last_outpkts) << 9; e->last_outpkts = n_outpkts; - e->outpps += ((long)rate - (long)e->outpps)>>2; - s->ustats.outpps = (e->outpps+0x1FF)>>10; + e->outpps += ((long)rate - (long)e->outpps) >> 2; + s->ustats.outpps = (e->outpps + 0x1FF) >> 10; - rate = (n_inbytes - e->last_inbytes)<<4; + rate = (n_inbytes - e->last_inbytes) << 4; e->last_inbytes = n_inbytes; - e->inbps += ((long)rate - (long)e->inbps)>>2; - s->ustats.inbps = (e->inbps+0xF)>>5; + e->inbps += ((long)rate - (long)e->inbps) >> 2; + s->ustats.inbps = (e->inbps + 0xF) >> 5; - rate = (n_outbytes - e->last_outbytes)<<4; + rate = (n_outbytes - e->last_outbytes) << 4; e->last_outbytes = n_outbytes; - e->outbps += ((long)rate - (long)e->outbps)>>2; - s->ustats.outbps = (e->outbps+0xF)>>5; + e->outbps += ((long)rate - (long)e->outbps) >> 2; + s->ustats.outbps = (e->outbps + 0xF) >> 5; spin_unlock(&s->lock); } - spin_unlock(&est_lock); - mod_timer(&est_timer, jiffies + 2*HZ); + spin_unlock(&ipvs->est_lock); + mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -void ip_vs_new_estimator(struct ip_vs_stats *stats) +void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; INIT_LIST_HEAD(&est->list); @@ -126,18 +128,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) est->last_outbytes = stats->ustats.outbytes; est->outbps = stats->ustats.outbps<<5; - spin_lock_bh(&est_lock); - list_add(&est->list, &est_list); - spin_unlock_bh(&est_lock); + spin_lock_bh(&ipvs->est_lock); + list_add(&est->list, &ipvs->est_list); + spin_unlock_bh(&ipvs->est_lock); } -void ip_vs_kill_estimator(struct ip_vs_stats *stats) +void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; - spin_lock_bh(&est_lock); + spin_lock_bh(&ipvs->est_lock); list_del(&est->list); - spin_unlock_bh(&est_lock); + spin_unlock_bh(&ipvs->est_lock); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) @@ -159,14 +162,25 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) static int __net_init __ip_vs_estimator_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + INIT_LIST_HEAD(&ipvs->est_list); + spin_lock_init(&ipvs->est_lock); + setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); + mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); return 0; } +static void __net_exit __ip_vs_estimator_exit(struct net *net) +{ + del_timer_sync(&net_ipvs(net)->est_timer); +} static struct pernet_operations ip_vs_app_ops = { .init = __ip_vs_estimator_init, + .exit = __ip_vs_estimator_exit, }; int __init ip_vs_estimator_init(void) @@ -174,14 +188,10 @@ int __init ip_vs_estimator_init(void) int rv; rv = register_pernet_subsys(&ip_vs_app_ops); - if (rv < 0) - return rv; - mod_timer(&est_timer, jiffies + 2 * HZ); return rv; } void ip_vs_estimator_cleanup(void) { - del_timer_sync(&est_timer); unregister_pernet_subsys(&ip_vs_app_ops); } -- cgit v1.1 From f131315fa272d337dfca7dad2f033ff5296dad65 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:55 +0100 Subject: IPVS: netns awareness to ip_vs_sync All global variables moved to struct ipvs, most external changes fixed (i.e. init_net removed) in sync_buf create + 4 replaced by sizeof(struct..) Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 15 +- net/netfilter/ipvs/ip_vs_ctl.c | 52 ++++--- net/netfilter/ipvs/ip_vs_sync.c | 334 +++++++++++++++++++++------------------- 3 files changed, 219 insertions(+), 182 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 9317aff..5531d56 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1471,12 +1471,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) static unsigned int ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) { - struct net *net = NULL; + struct net *net; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, restart, pkts; + struct netns_ipvs *ipvs; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -1556,7 +1557,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - + net = skb_net(skb); + ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1589,12 +1591,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * * For ONE_PKT let ip_vs_sync_conn() do the filter work. */ + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) pkts = sysctl_ip_vs_sync_threshold[0]; else pkts = atomic_add_return(1, &cp->in_pkts); - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && + if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && (pkts % sysctl_ip_vs_sync_threshold[1] @@ -1603,13 +1606,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { - ip_vs_sync_conn(cp); + ip_vs_sync_conn(net, cp); goto out; } } /* Keep this block last: TCP and others with pp->num_states <= 1 */ - else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && + else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && (pkts % sysctl_ip_vs_sync_threshold[1] @@ -1619,7 +1622,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) (cp->state == IP_VS_TCP_S_CLOSE) || (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || (cp->state == IP_VS_TCP_S_TIME_WAIT))))) - ip_vs_sync_conn(cp); + ip_vs_sync_conn(net, cp); out: cp->old_state = cp->state; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c89beb8..03f8631 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1559,7 +1559,8 @@ proc_do_sync_mode(ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - ip_vs_sync_switch_mode(val); + struct net *net = current->nsproxy->net_ns; + ip_vs_sync_switch_mode(net, val); } } return rc; @@ -2174,11 +2175,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); + ret = start_sync_thread(net, dm->state, dm->mcast_ifn, + dm->syncid); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = stop_sync_thread(dm->state); + ret = stop_sync_thread(net, dm->state); goto out_unlock; } @@ -2424,6 +2426,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int ret = 0; unsigned int copylen; struct net *net = sock_net(sk); + struct netns_ipvs *ipvs = net_ipvs(net); BUG_ON(!net); if (!capable(CAP_NET_ADMIN)) @@ -2546,15 +2549,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_daemon_user d[2]; memset(&d, 0, sizeof(d)); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) { + if (ipvs->sync_state & IP_VS_STATE_MASTER) { d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); - d[0].syncid = ip_vs_master_syncid; + strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, + sizeof(d[0].mcast_ifn)); + d[0].syncid = ipvs->master_syncid; } - if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { + if (ipvs->sync_state & IP_VS_STATE_BACKUP) { d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); - d[1].syncid = ip_vs_backup_syncid; + strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, + sizeof(d[1].mcast_ifn)); + d[1].syncid = ipvs->backup_syncid; } if (copy_to_user(user, &d, sizeof(d)) != 0) ret = -EFAULT; @@ -3061,20 +3066,23 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb_net(skb); + struct netns_ipvs *ipvs = net_ipvs(net); + mutex_lock(&__ip_vs_mutex); - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { + if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, - ip_vs_master_mcast_ifn, - ip_vs_master_syncid, cb) < 0) + ipvs->master_mcast_ifn, + ipvs->master_syncid, cb) < 0) goto nla_put_failure; cb->args[0] = 1; } - if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { + if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, - ip_vs_backup_mcast_ifn, - ip_vs_backup_syncid, cb) < 0) + ipvs->backup_mcast_ifn, + ipvs->backup_syncid, cb) < 0) goto nla_put_failure; cb->args[1] = 1; @@ -3086,24 +3094,26 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_new_daemon(struct nlattr **attrs) +static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) { if (!(attrs[IPVS_DAEMON_ATTR_STATE] && attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && attrs[IPVS_DAEMON_ATTR_SYNC_ID])) return -EINVAL; - return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), + return start_sync_thread(net, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); } -static int ip_vs_genl_del_daemon(struct nlattr **attrs) +static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) { if (!attrs[IPVS_DAEMON_ATTR_STATE]) return -EINVAL; - return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + return stop_sync_thread(net, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); } static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) @@ -3159,9 +3169,9 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) } if (cmd == IPVS_CMD_NEW_DAEMON) - ret = ip_vs_genl_new_daemon(daemon_attrs); + ret = ip_vs_genl_new_daemon(net, daemon_attrs); else - ret = ip_vs_genl_del_daemon(daemon_attrs); + ret = ip_vs_genl_del_daemon(net, daemon_attrs); goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 6831e8f..c29e73d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -192,6 +192,7 @@ union ip_vs_sync_conn { #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) struct ip_vs_sync_thread_data { + struct net *net; struct socket *sock; char *buf; }; @@ -259,10 +260,6 @@ struct ip_vs_sync_mesg { /* ip_vs_sync_conn entries start here */ }; -/* the maximum length of sync (sending/receiving) message */ -static int sync_send_mesg_maxlen; -static int sync_recv_mesg_maxlen; - struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; @@ -273,28 +270,6 @@ struct ip_vs_sync_buff { unsigned char *end; }; - -/* the sync_buff list head and the lock */ -static LIST_HEAD(ip_vs_sync_queue); -static DEFINE_SPINLOCK(ip_vs_sync_lock); - -/* current sync_buff for accepting new conn entries */ -static struct ip_vs_sync_buff *curr_sb = NULL; -static DEFINE_SPINLOCK(curr_sb_lock); - -/* ipvs sync daemon state */ -volatile int ip_vs_sync_state = IP_VS_STATE_NONE; -volatile int ip_vs_master_syncid = 0; -volatile int ip_vs_backup_syncid = 0; - -/* multicast interface name */ -char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; -char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; - -/* sync daemon tasks */ -static struct task_struct *sync_master_thread; -static struct task_struct *sync_backup_thread; - /* multicast addr */ static struct sockaddr_in mcast_addr = { .sin_family = AF_INET, @@ -324,20 +299,20 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) put_unaligned_be32(ho->previous_delta, &no->previous_delta); } -static inline struct ip_vs_sync_buff *sb_dequeue(void) +static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; - spin_lock_bh(&ip_vs_sync_lock); - if (list_empty(&ip_vs_sync_queue)) { + spin_lock_bh(&ipvs->sync_lock); + if (list_empty(&ipvs->sync_queue)) { sb = NULL; } else { - sb = list_entry(ip_vs_sync_queue.next, + sb = list_entry(ipvs->sync_queue.next, struct ip_vs_sync_buff, list); list_del(&sb->list); } - spin_unlock_bh(&ip_vs_sync_lock); + spin_unlock_bh(&ipvs->sync_lock); return sb; } @@ -345,25 +320,27 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void) /* * Create a new sync buffer for Version 1 proto. */ -static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) +static inline struct ip_vs_sync_buff * +ip_vs_sync_buff_create(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { + sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + if (!sb->mesg) { kfree(sb); return NULL; } sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ sb->mesg->version = SYNC_PROTO_VER; - sb->mesg->syncid = ip_vs_master_syncid; + sb->mesg->syncid = ipvs->master_syncid; sb->mesg->size = sizeof(struct ip_vs_sync_mesg); sb->mesg->nr_conns = 0; sb->mesg->spare = 0; sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); - sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; + sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; sb->firstuse = jiffies; return sb; @@ -375,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) kfree(sb); } -static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) +static inline void sb_queue_tail(struct netns_ipvs *ipvs) { - spin_lock(&ip_vs_sync_lock); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) - list_add_tail(&sb->list, &ip_vs_sync_queue); + struct ip_vs_sync_buff *sb = ipvs->sync_buff; + + spin_lock(&ipvs->sync_lock); + if (ipvs->sync_state & IP_VS_STATE_MASTER) + list_add_tail(&sb->list, &ipvs->sync_queue); else ip_vs_sync_buff_release(sb); - spin_unlock(&ip_vs_sync_lock); + spin_unlock(&ipvs->sync_lock); } /* @@ -390,18 +369,18 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) * than the specified time or the specified time is zero. */ static inline struct ip_vs_sync_buff * -get_curr_sync_buff(unsigned long time) +get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) { struct ip_vs_sync_buff *sb; - spin_lock_bh(&curr_sb_lock); - if (curr_sb && (time == 0 || - time_before(jiffies - curr_sb->firstuse, time))) { - sb = curr_sb; - curr_sb = NULL; + spin_lock_bh(&ipvs->sync_buff_lock); + if (ipvs->sync_buff && (time == 0 || + time_before(jiffies - ipvs->sync_buff->firstuse, time))) { + sb = ipvs->sync_buff; + ipvs->sync_buff = NULL; } else sb = NULL; - spin_unlock_bh(&curr_sb_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); return sb; } @@ -409,33 +388,37 @@ get_curr_sync_buff(unsigned long time) * Switch mode from sending version 0 or 1 * - must handle sync_buf */ -void ip_vs_sync_switch_mode(int mode) { +void ip_vs_sync_switch_mode(struct net *net, int mode) +{ + struct netns_ipvs *ipvs = net_ipvs(net); - if (!ip_vs_sync_state & IP_VS_STATE_MASTER) + if (!ipvs->sync_state & IP_VS_STATE_MASTER) return; - if (mode == sysctl_ip_vs_sync_ver || !curr_sb) + if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff) return; - spin_lock_bh(&curr_sb_lock); + spin_lock_bh(&ipvs->sync_buff_lock); /* Buffer empty ? then let buf_create do the job */ - if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { - kfree(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { + kfree(ipvs->sync_buff); + ipvs->sync_buff = NULL; } else { - spin_lock_bh(&ip_vs_sync_lock); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) - list_add_tail(&curr_sb->list, &ip_vs_sync_queue); + spin_lock_bh(&ipvs->sync_lock); + if (ipvs->sync_state & IP_VS_STATE_MASTER) + list_add_tail(&ipvs->sync_buff->list, + &ipvs->sync_queue); else - ip_vs_sync_buff_release(curr_sb); - spin_unlock_bh(&ip_vs_sync_lock); + ip_vs_sync_buff_release(ipvs->sync_buff); + spin_unlock_bh(&ipvs->sync_lock); } - spin_unlock_bh(&curr_sb_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); } /* * Create a new sync buffer for Version 0 proto. */ -static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) +static inline struct ip_vs_sync_buff * +ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; struct ip_vs_sync_mesg_v0 *mesg; @@ -443,16 +426,17 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { + sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + if (!sb->mesg) { kfree(sb); return NULL; } mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; mesg->nr_conns = 0; - mesg->syncid = ip_vs_master_syncid; - mesg->size = 4; - sb->head = (unsigned char *)mesg + 4; - sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen; + mesg->syncid = ipvs->master_syncid; + mesg->size = sizeof(struct ip_vs_sync_mesg_v0); + sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); + sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; sb->firstuse = jiffies; return sb; } @@ -461,8 +445,9 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) * Version 0 , could be switched in by sys_ctl. * Add an ip_vs_conn information into the current sync_buff. */ -void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) +void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg_v0 *m; struct ip_vs_sync_conn_v0 *s; int len; @@ -473,10 +458,12 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return; - spin_lock(&curr_sb_lock); - if (!curr_sb) { - if (!(curr_sb=ip_vs_sync_buff_create_v0())) { - spin_unlock(&curr_sb_lock); + spin_lock(&ipvs->sync_buff_lock); + if (!ipvs->sync_buff) { + ipvs->sync_buff = + ip_vs_sync_buff_create_v0(ipvs); + if (!ipvs->sync_buff) { + spin_unlock(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } @@ -484,8 +471,8 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : SIMPLE_CONN_SIZE; - m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg; - s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; + m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; + s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; /* copy members */ s->reserved = 0; @@ -506,18 +493,18 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) m->nr_conns++; m->size += len; - curr_sb->head += len; + ipvs->sync_buff->head += len; /* check if there is a space for next one */ - if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) { - sb_queue_tail(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { + sb_queue_tail(ipvs); + ipvs->sync_buff = NULL; } - spin_unlock(&curr_sb_lock); + spin_unlock(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ if (cp->control) - ip_vs_sync_conn(cp->control); + ip_vs_sync_conn(net, cp->control); } /* @@ -525,8 +512,9 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) * Called by ip_vs_in. * Sending Version 1 messages */ -void ip_vs_sync_conn(struct ip_vs_conn *cp) +void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m; union ip_vs_sync_conn *s; __u8 *p; @@ -534,7 +522,7 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) /* Handle old version of the protocol */ if (sysctl_ip_vs_sync_ver == 0) { - ip_vs_sync_conn_v0(cp); + ip_vs_sync_conn_v0(net, cp); return; } /* Do not sync ONE PACKET */ @@ -551,7 +539,7 @@ sloop: pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); } - spin_lock(&curr_sb_lock); + spin_lock(&ipvs->sync_buff_lock); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -570,26 +558,27 @@ sloop: /* check if there is a space for this one */ pad = 0; - if (curr_sb) { - pad = (4 - (size_t)curr_sb->head) & 3; - if (curr_sb->head + len + pad > curr_sb->end) { - sb_queue_tail(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff) { + pad = (4 - (size_t)ipvs->sync_buff->head) & 3; + if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { + sb_queue_tail(ipvs); + ipvs->sync_buff = NULL; pad = 0; } } - if (!curr_sb) { - if (!(curr_sb=ip_vs_sync_buff_create())) { - spin_unlock(&curr_sb_lock); + if (!ipvs->sync_buff) { + ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); + if (!ipvs->sync_buff) { + spin_unlock(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } } - m = curr_sb->mesg; - p = curr_sb->head; - curr_sb->head += pad + len; + m = ipvs->sync_buff->mesg; + p = ipvs->sync_buff->head; + ipvs->sync_buff->head += pad + len; m->size += pad + len; /* Add ev. padding from prev. sync_conn */ while (pad--) @@ -647,7 +636,7 @@ sloop: } } - spin_unlock(&curr_sb_lock); + spin_unlock(&ipvs->sync_buff_lock); control: /* synchronize its controller if it has */ @@ -699,7 +688,8 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, buff[pe_name_len]=0; p->pe = __ip_vs_pe_getbyname(buff); if (!p->pe) { - IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff); + IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", + buff); return 1; } } else { @@ -748,7 +738,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(&init_net, type, daddr, dport, param->vaddr, + dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark); /* Set the approprite ativity flag */ @@ -1089,6 +1079,7 @@ out: static void ip_vs_process_message(struct net *net, __u8 *buffer, const size_t buflen) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; int i, nr_conns; @@ -1105,7 +1096,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, return; } /* SyncID sanity check */ - if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) { + if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); return; } @@ -1190,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname) { struct net_device *dev; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) @@ -1210,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname) * Set the maximum length of sync message according to the * specified interface's MTU. */ -static int set_sync_mesg_maxlen(int sync_state) +static int set_sync_mesg_maxlen(struct net *net, int sync_state) { + struct netns_ipvs *ipvs = net_ipvs(net); struct net_device *dev; int num; if (sync_state == IP_VS_STATE_MASTER) { - if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) + dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); + if (!dev) return -ENODEV; num = (dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr) - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + + ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); IP_VS_DBG(7, "setting the maximum length of sync sending " - "message %d.\n", sync_send_mesg_maxlen); + "message %d.\n", ipvs->send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { - if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) + dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); + if (!dev) return -ENODEV; - sync_recv_mesg_maxlen = dev->mtu - + ipvs->recv_mesg_maxlen = dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr); IP_VS_DBG(7, "setting the maximum length of sync receiving " - "message %d.\n", sync_recv_mesg_maxlen); + "message %d.\n", ipvs->recv_mesg_maxlen); } return 0; @@ -1248,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state) static int join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) { + struct net *net = sock_net(sk); struct ip_mreqn mreq; struct net_device *dev; int ret; @@ -1255,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1272,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) static int bind_mcastif_addr(struct socket *sock, char *ifname) { + struct net *net = sock_net(sock->sk); struct net_device *dev; __be32 addr; struct sockaddr_in sin; - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); @@ -1298,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) /* * Set up sending multicast socket over UDP */ -static struct socket * make_send_sock(void) +static struct socket *make_send_sock(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); struct socket *sock; int result; @@ -1310,7 +1311,7 @@ static struct socket * make_send_sock(void) return ERR_PTR(result); } - result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); + result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; @@ -1319,7 +1320,7 @@ static struct socket * make_send_sock(void) set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, 1); - result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); + result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error binding address of the mcast interface\n"); goto error; @@ -1343,8 +1344,9 @@ static struct socket * make_send_sock(void) /* * Set up receiving multicast socket over UDP */ -static struct socket * make_receive_sock(void) +static struct socket *make_receive_sock(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); struct socket *sock; int result; @@ -1368,7 +1370,7 @@ static struct socket * make_receive_sock(void) /* join the multicast group */ result = join_mcast_group(sock->sk, (struct in_addr *) &mcast_addr.sin_addr, - ip_vs_backup_mcast_ifn); + ipvs->backup_mcast_ifn); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; @@ -1439,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) static int sync_thread_master(void *data) { struct ip_vs_sync_thread_data *tinfo = data; + struct netns_ipvs *ipvs = net_ipvs(tinfo->net); struct ip_vs_sync_buff *sb; pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d\n", - ip_vs_master_mcast_ifn, ip_vs_master_syncid); + ipvs->master_mcast_ifn, ipvs->master_syncid); while (!kthread_should_stop()) { - while ((sb = sb_dequeue())) { + while ((sb = sb_dequeue(ipvs))) { ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); } - /* check if entries stay in curr_sb for 2 seconds */ - sb = get_curr_sync_buff(2 * HZ); + /* check if entries stay in ipvs->sync_buff for 2 seconds */ + sb = get_curr_sync_buff(ipvs, 2 * HZ); if (sb) { ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); @@ -1462,14 +1465,13 @@ static int sync_thread_master(void *data) } /* clean up the sync_buff queue */ - while ((sb=sb_dequeue())) { + while ((sb = sb_dequeue(ipvs))) ip_vs_sync_buff_release(sb); - } /* clean up the current sync_buff */ - if ((sb = get_curr_sync_buff(0))) { + sb = get_curr_sync_buff(ipvs, 0); + if (sb) ip_vs_sync_buff_release(sb); - } /* release the sending multicast socket */ sock_release(tinfo->sock); @@ -1482,11 +1484,12 @@ static int sync_thread_master(void *data) static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; + struct netns_ipvs *ipvs = net_ipvs(tinfo->net); int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d\n", - ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); + ipvs->backup_mcast_ifn, ipvs->backup_syncid); while (!kthread_should_stop()) { wait_event_interruptible(*sk_sleep(tinfo->sock->sk), @@ -1496,7 +1499,7 @@ static int sync_thread_backup(void *data) /* do we have data now? */ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { len = ip_vs_receive(tinfo->sock, tinfo->buf, - sync_recv_mesg_maxlen); + ipvs->recv_mesg_maxlen); if (len <= 0) { pr_err("receiving message error\n"); break; @@ -1505,7 +1508,7 @@ static int sync_thread_backup(void *data) /* disable bottom half, because it accesses the data shared by softirq while getting/creating conns */ local_bh_disable(); - ip_vs_process_message(&init_net, tinfo->buf, len); + ip_vs_process_message(tinfo->net, tinfo->buf, len); local_bh_enable(); } } @@ -1519,11 +1522,12 @@ static int sync_thread_backup(void *data) } -int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) +int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) { struct ip_vs_sync_thread_data *tinfo; struct task_struct **realtask, *task; struct socket *sock; + struct netns_ipvs *ipvs = net_ipvs(net); char *name, *buf = NULL; int (*threadfn)(void *data); int result = -ENOMEM; @@ -1533,27 +1537,27 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) sizeof(struct ip_vs_sync_conn_v0)); if (state == IP_VS_STATE_MASTER) { - if (sync_master_thread) + if (ipvs->master_thread) return -EEXIST; - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, - sizeof(ip_vs_master_mcast_ifn)); - ip_vs_master_syncid = syncid; - realtask = &sync_master_thread; - name = "ipvs_syncmaster"; + strlcpy(ipvs->master_mcast_ifn, mcast_ifn, + sizeof(ipvs->master_mcast_ifn)); + ipvs->master_syncid = syncid; + realtask = &ipvs->master_thread; + name = "ipvs_master:%d"; threadfn = sync_thread_master; - sock = make_send_sock(); + sock = make_send_sock(net); } else if (state == IP_VS_STATE_BACKUP) { - if (sync_backup_thread) + if (ipvs->backup_thread) return -EEXIST; - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, - sizeof(ip_vs_backup_mcast_ifn)); - ip_vs_backup_syncid = syncid; - realtask = &sync_backup_thread; - name = "ipvs_syncbackup"; + strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, + sizeof(ipvs->backup_mcast_ifn)); + ipvs->backup_syncid = syncid; + realtask = &ipvs->backup_thread; + name = "ipvs_backup:%d"; threadfn = sync_thread_backup; - sock = make_receive_sock(); + sock = make_receive_sock(net); } else { return -EINVAL; } @@ -1563,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) goto out; } - set_sync_mesg_maxlen(state); + set_sync_mesg_maxlen(net, state); if (state == IP_VS_STATE_BACKUP) { - buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); + buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); if (!buf) goto outsocket; } @@ -1574,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if (!tinfo) goto outbuf; + tinfo->net = net; tinfo->sock = sock; tinfo->buf = buf; - task = kthread_run(threadfn, tinfo, name); + task = kthread_run(threadfn, tinfo, name, ipvs->gen); if (IS_ERR(task)) { result = PTR_ERR(task); goto outtinfo; @@ -1585,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) /* mark as active */ *realtask = task; - ip_vs_sync_state |= state; + ipvs->sync_state |= state; /* increase the module use count */ ip_vs_use_count_inc(); @@ -1603,16 +1608,18 @@ out: } -int stop_sync_thread(int state) +int stop_sync_thread(struct net *net, int state) { + struct netns_ipvs *ipvs = net_ipvs(net); + IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); if (state == IP_VS_STATE_MASTER) { - if (!sync_master_thread) + if (!ipvs->master_thread) return -ESRCH; pr_info("stopping master sync thread %d ...\n", - task_pid_nr(sync_master_thread)); + task_pid_nr(ipvs->master_thread)); /* * The lock synchronizes with sb_queue_tail(), so that we don't @@ -1620,21 +1627,21 @@ int stop_sync_thread(int state) * progress of stopping the master sync daemon. */ - spin_lock_bh(&ip_vs_sync_lock); - ip_vs_sync_state &= ~IP_VS_STATE_MASTER; - spin_unlock_bh(&ip_vs_sync_lock); - kthread_stop(sync_master_thread); - sync_master_thread = NULL; + spin_lock_bh(&ipvs->sync_lock); + ipvs->sync_state &= ~IP_VS_STATE_MASTER; + spin_unlock_bh(&ipvs->sync_lock); + kthread_stop(ipvs->master_thread); + ipvs->master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { - if (!sync_backup_thread) + if (!ipvs->backup_thread) return -ESRCH; pr_info("stopping backup sync thread %d ...\n", - task_pid_nr(sync_backup_thread)); + task_pid_nr(ipvs->backup_thread)); - ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; - kthread_stop(sync_backup_thread); - sync_backup_thread = NULL; + ipvs->sync_state &= ~IP_VS_STATE_BACKUP; + kthread_stop(ipvs->backup_thread); + ipvs->backup_thread = NULL; } else { return -EINVAL; } @@ -1650,12 +1657,29 @@ int stop_sync_thread(int state) */ static int __net_init __ip_vs_sync_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + INIT_LIST_HEAD(&ipvs->sync_queue); + spin_lock_init(&ipvs->sync_lock); + spin_lock_init(&ipvs->sync_buff_lock); + + ipvs->sync_mcast_addr.sin_family = AF_INET; + ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); + ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); return 0; } static void __ip_vs_sync_cleanup(struct net *net) { + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + stop_sync_thread(net, IP_VS_STATE_MASTER); + stop_sync_thread(net, IP_VS_STATE_BACKUP); } + static struct pernet_operations ipvs_sync_ops = { .init = __ip_vs_sync_init, .exit = __ip_vs_sync_cleanup, -- cgit v1.1 From b17fc9963f837ef1acfe36e193108fb16ed58647 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:56 +0100 Subject: IPVS: netns, ip_vs_stats and its procfs The statistic counter locks for every packet are now removed, and that statistic is now per CPU, i.e. no locks needed. However summing is made in ip_vs_est into ip_vs_stats struct which is moved to ipvs struc. procfs, ip_vs_stats now have a "per cpu" count and a grand total. A new function seq_file_single_net() in ip_vs.h created for handling of single_open_net() since it does not place net ptr in a struct, like others. /var/lib/lxc # cat /proc/net/ip_vs_stats_percpu Total Incoming Outgoing Incoming Outgoing CPU Conns Packets Packets Bytes Bytes 0 0 3 1 9D 34 1 0 1 2 49 70 2 0 1 2 34 76 3 1 2 2 70 74 ~ 1 7 7 18A 18E Conns/s Pkts/s Pkts/s Bytes/s Bytes/s 0 0 0 0 0 *v3 ip_vs_stats reamains as before, instead ip_vs_stats_percpu is added. u64 seq lock added *v4 Bug correction inbytes and outbytes as own vars.. per_cpu counter for all stats now as suggested by Julian. [horms@verge.net.au: removed whitespace-change-only hunk] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 89 ++++++++++++++------------ net/netfilter/ipvs/ip_vs_ctl.c | 134 ++++++++++++++++++++++++++++++++++------ net/netfilter/ipvs/ip_vs_est.c | 39 ++++++++++++ 3 files changed, 204 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5531d56..7e6a2a0 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -115,21 +115,28 @@ static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { - spin_lock(&dest->stats.lock); - dest->stats.ustats.inpkts++; - dest->stats.ustats.inbytes += skb->len; - spin_unlock(&dest->stats.lock); - - spin_lock(&dest->svc->stats.lock); - dest->svc->stats.ustats.inpkts++; - dest->svc->stats.ustats.inbytes += skb->len; - spin_unlock(&dest->svc->stats.lock); - - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.inpkts++; - ip_vs_stats.ustats.inbytes += skb->len; - spin_unlock(&ip_vs_stats.lock); + struct ip_vs_cpu_stats *s; + + s = this_cpu_ptr(dest->stats.cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(dest->svc->stats.cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); } } @@ -138,21 +145,28 @@ static inline void ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { - spin_lock(&dest->stats.lock); - dest->stats.ustats.outpkts++; - dest->stats.ustats.outbytes += skb->len; - spin_unlock(&dest->stats.lock); - - spin_lock(&dest->svc->stats.lock); - dest->svc->stats.ustats.outpkts++; - dest->svc->stats.ustats.outbytes += skb->len; - spin_unlock(&dest->svc->stats.lock); - - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.outpkts++; - ip_vs_stats.ustats.outbytes += skb->len; - spin_unlock(&ip_vs_stats.lock); + struct ip_vs_cpu_stats *s; + + s = this_cpu_ptr(dest->stats.cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(dest->svc->stats.cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); } } @@ -160,17 +174,17 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { - spin_lock(&cp->dest->stats.lock); - cp->dest->stats.ustats.conns++; - spin_unlock(&cp->dest->stats.lock); + struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_cpu_stats *s; - spin_lock(&svc->stats.lock); - svc->stats.ustats.conns++; - spin_unlock(&svc->stats.lock); + s = this_cpu_ptr(cp->dest->stats.cpustats); + s->ustats.conns++; - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.conns++; - spin_unlock(&ip_vs_stats.lock); + s = this_cpu_ptr(svc->stats.cpustats); + s->ustats.conns++; + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.conns++; } @@ -1841,7 +1855,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { }, #endif }; - /* * Initialize IP Virtual Server netns mem. */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 03f8631..cbd58c6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -257,8 +257,7 @@ static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); static void defense_work_handler(struct work_struct *work) { - struct net *net = &init_net; - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs = net_ipvs(&init_net); update_defense_level(ipvs); if (atomic_read(&ip_vs_dropentry)) @@ -519,6 +518,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port), atomic_read(&svc->usecnt)); + free_percpu(svc->stats.cpustats); kfree(svc); } } @@ -722,6 +722,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); kfree(dest); } } @@ -747,6 +748,7 @@ static void ip_vs_trash_cleanup(void) list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); kfree(dest); } } @@ -868,6 +870,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!dest->stats.cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto err_alloc; + } dest->af = svc->af; dest->protocol = svc->protocol; @@ -891,6 +898,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, LeaveFunction(2); return 0; + +err_alloc: + kfree(dest); + return -ENOMEM; } @@ -1037,6 +1048,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) and only one user context can update virtual service at a time, so the operation here is OK */ atomic_dec(&dest->svc->refcnt); + free_percpu(dest->stats.cpustats); kfree(dest); } else { IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " @@ -1163,6 +1175,11 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ret = -ENOMEM; goto out_err; } + svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!svc->stats.cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto out_err; + } /* I'm the first user of the service */ atomic_set(&svc->usecnt, 0); @@ -1212,6 +1229,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, *svc_p = svc; return 0; + out_err: if (svc != NULL) { ip_vs_unbind_scheduler(svc); @@ -1220,6 +1238,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ip_vs_app_inc_put(svc->inc); local_bh_enable(); } + if (svc->stats.cpustats) + free_percpu(svc->stats.cpustats); kfree(svc); } ip_vs_scheduler_put(sched); @@ -1388,6 +1408,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port), atomic_read(&svc->usecnt)); + free_percpu(svc->stats.cpustats); kfree(svc); } @@ -1499,7 +1520,7 @@ static int ip_vs_zero_all(struct net *net) } } - ip_vs_zero_stats(&ip_vs_stats); + ip_vs_zero_stats(net_ipvs(net)->tot_stats); return 0; } @@ -1989,13 +2010,11 @@ static const struct file_operations ip_vs_info_fops = { #endif -struct ip_vs_stats ip_vs_stats = { - .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), -}; - #ifdef CONFIG_PROC_FS static int ip_vs_stats_show(struct seq_file *seq, void *v) { + struct net *net = seq_file_single_net(seq); + struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -2003,22 +2022,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) seq_printf(seq, " Conns Packets Packets Bytes Bytes\n"); - spin_lock_bh(&ip_vs_stats.lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, - ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, - (unsigned long long) ip_vs_stats.ustats.inbytes, - (unsigned long long) ip_vs_stats.ustats.outbytes); + spin_lock_bh(&tot_stats->lock); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, + tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, + (unsigned long long) tot_stats->ustats.inbytes, + (unsigned long long) tot_stats->ustats.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq,"%8X %8X %8X %16X %16X\n", - ip_vs_stats.ustats.cps, - ip_vs_stats.ustats.inpps, - ip_vs_stats.ustats.outpps, - ip_vs_stats.ustats.inbps, - ip_vs_stats.ustats.outbps); - spin_unlock_bh(&ip_vs_stats.lock); + tot_stats->ustats.cps, + tot_stats->ustats.inpps, + tot_stats->ustats.outpps, + tot_stats->ustats.inbps, + tot_stats->ustats.outbps); + spin_unlock_bh(&tot_stats->lock); return 0; } @@ -2036,6 +2055,59 @@ static const struct file_operations ip_vs_stats_fops = { .release = single_release, }; +static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) +{ + struct net *net = seq_file_single_net(seq); + struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + int i; + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_puts(seq, + " Total Incoming Outgoing Incoming Outgoing\n"); + seq_printf(seq, + "CPU Conns Packets Packets Bytes Bytes\n"); + + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); + seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", + i, u->ustats.conns, u->ustats.inpkts, + u->ustats.outpkts, (__u64)u->ustats.inbytes, + (__u64)u->ustats.outbytes); + } + + spin_lock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", + tot_stats->ustats.conns, tot_stats->ustats.inpkts, + tot_stats->ustats.outpkts, + (unsigned long long) tot_stats->ustats.inbytes, + (unsigned long long) tot_stats->ustats.outbytes); + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_puts(seq, + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, " %8X %8X %8X %16X %16X\n", + tot_stats->ustats.cps, + tot_stats->ustats.inpps, + tot_stats->ustats.outpps, + tot_stats->ustats.inbps, + tot_stats->ustats.outbps); + spin_unlock_bh(&tot_stats->lock); + + return 0; +} + +static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, ip_vs_stats_percpu_show); +} + +static const struct file_operations ip_vs_stats_percpu_fops = { + .owner = THIS_MODULE, + .open = ip_vs_stats_percpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* @@ -3461,32 +3533,54 @@ int __net_init __ip_vs_control_init(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + /* procfs stats */ + ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); + if (ipvs->tot_stats == NULL) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!ipvs->cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto err_alloc; + } + spin_lock_init(&ipvs->tot_stats->lock); for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) INIT_LIST_HEAD(&ipvs->rs_table[idx]); proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); + proc_net_fops_create(net, "ip_vs_stats_percpu", 0, + &ip_vs_stats_percpu_fops); sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, vs_vars); if (sysctl_header == NULL) goto err_reg; - ip_vs_new_estimator(net, &ip_vs_stats); + ip_vs_new_estimator(net, ipvs->tot_stats); return 0; err_reg: + free_percpu(ipvs->cpustats); +err_alloc: + kfree(ipvs->tot_stats); return -ENOMEM; } static void __net_exit __ip_vs_control_cleanup(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; - ip_vs_kill_estimator(net, &ip_vs_stats); + ip_vs_kill_estimator(net, ipvs->tot_stats); unregister_net_sysctl_table(sysctl_header); + proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); + free_percpu(ipvs->cpustats); + kfree(ipvs->tot_stats); } static struct pernet_operations ipvs_control_ops = { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 07d839b..d13616b 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -52,6 +52,43 @@ */ +/* + * Make a summary from each cpu + */ +static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, + struct ip_vs_cpu_stats *stats) +{ + int i; + + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); + unsigned int start; + __u64 inbytes, outbytes; + if (i) { + sum->conns += s->ustats.conns; + sum->inpkts += s->ustats.inpkts; + sum->outpkts += s->ustats.outpkts; + do { + start = u64_stats_fetch_begin_bh(&s->syncp); + inbytes = s->ustats.inbytes; + outbytes = s->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + sum->inbytes += inbytes; + sum->outbytes += outbytes; + } else { + sum->conns = s->ustats.conns; + sum->inpkts = s->ustats.inpkts; + sum->outpkts = s->ustats.outpkts; + do { + start = u64_stats_fetch_begin_bh(&s->syncp); + sum->inbytes = s->ustats.inbytes; + sum->outbytes = s->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } + } +} + + static void estimation_timer(unsigned long arg) { struct ip_vs_estimator *e; @@ -64,10 +101,12 @@ static void estimation_timer(unsigned long arg) struct netns_ipvs *ipvs; ipvs = net_ipvs(net); + ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); + ip_vs_read_cpu_stats(&s->ustats, s->cpustats); spin_lock(&s->lock); n_conns = s->ustats.conns; n_inpkts = s->ustats.inpkts; -- cgit v1.1 From 6e67e586e7289c144d5a189d6e0fa7141d025746 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:57 +0100 Subject: IPVS: netns, connection hash got net as param. Connection hash table is now name space aware. i.e. net ptr >> 8 is xor:ed to the hash, and this is the first param to be compared. The net struct is 0xa40 in size ( a little bit smaller for 32 bit arch:s) and cache-line aligned, so a ptr >> 5 might be a more clever solution ? All lookups where net is compared uses net_eq() which returns 1 when netns is disabled, and the compiler seems to do something clever in that case. ip_vs_conn_fill_param() have *net as first param now. Three new inlines added to keep conn struct smaller when names space is disabled. - ip_vs_conn_net() - ip_vs_conn_net_set() - ip_vs_conn_net_eq() *v3 moved net compare to the end in "fast path" Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 112 ++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_core.c | 15 +++-- net/netfilter/ipvs/ip_vs_ftp.c | 14 ++-- net/netfilter/ipvs/ip_vs_nfct.c | 6 +- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 15 +++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 13 ++-- 9 files changed, 109 insertions(+), 72 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b2024c9..0d5e4fe 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -64,9 +64,6 @@ static struct list_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; -/* counter for current IPVS connections */ -static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); - /* counter for no client port connections */ static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); @@ -76,7 +73,7 @@ static unsigned int ip_vs_conn_rnd __read_mostly; /* * Fine locking granularity for big connection hash table */ -#define CT_LOCKARRAY_BITS 4 +#define CT_LOCKARRAY_BITS 5 #define CT_LOCKARRAY_SIZE (1<>8)) & ip_vs_conn_tab_mask; #endif - return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, - ip_vs_conn_rnd) - & ip_vs_conn_tab_mask; + return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, + ip_vs_conn_rnd) ^ + ((size_t)net>>8)) & ip_vs_conn_tab_mask; } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, @@ -166,15 +163,15 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, port = p->vport; } - return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); } static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, - NULL, 0, &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, + &cp->caddr, cp->cport, NULL, 0, &p); if (cp->pe) { p.pe = cp->pe; @@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) } /* - * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. + * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. * returns bool success. */ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) @@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && + p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && - p->cport == cp->cport && p->vport == cp->vport && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -313,17 +311,18 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; + struct net *net = skb_net(skb); pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) return 1; if (likely(!inverse)) - ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], - &iph->daddr, pptr[1], p); + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, + pptr[0], &iph->daddr, pptr[1], p); else - ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], - &iph->saddr, pptr[0], p); + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, + pptr[1], &iph->saddr, pptr[0], p); return 0; } @@ -352,6 +351,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (!ip_vs_conn_net_eq(cp, p->net)) + continue; if (p->pe_data && p->pe->ct_match) { if (p->pe == cp->pe && p->pe->ct_match(p, cp)) goto out; @@ -403,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && + p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && - p->vport == cp->cport && p->cport == cp->dport && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { /* HIT */ atomic_inc(&cp->refcnt); ret = cp; @@ -609,8 +611,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport, - &cp->vaddr, cp->vport, + dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, + cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark); ip_vs_bind_dest(cp, dest); return dest; @@ -728,6 +730,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); cp->timeout = 60*HZ; @@ -770,7 +773,7 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); - atomic_dec(&ip_vs_conn_count); + atomic_dec(&ipvs->conn_count); kmem_cache_free(ip_vs_conn_cachep, cp); return; @@ -804,7 +807,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol); + struct netns_ipvs *ipvs = net_ipvs(p->net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, + p->protocol); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { @@ -814,6 +819,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); + ip_vs_conn_net_set(cp, p->net); cp->af = p->af; cp->protocol = p->protocol; ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); @@ -844,7 +850,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); - atomic_inc(&ip_vs_conn_count); + atomic_inc(&ipvs->conn_count); if (flags & IP_VS_CONN_F_NO_CPORT) atomic_inc(&ip_vs_conn_no_cport_cnt); @@ -886,17 +892,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, * /proc/net/ip_vs_conn entries */ #ifdef CONFIG_PROC_FS +struct ip_vs_iter_state { + struct seq_net_private p; + struct list_head *l; +}; static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) { int idx; struct ip_vs_conn *cp; + struct ip_vs_iter_state *iter = seq->private; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { - seq->private = &ip_vs_conn_tab[idx]; + iter->l = &ip_vs_conn_tab[idx]; return cp; } } @@ -908,14 +919,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) { - seq->private = NULL; + struct ip_vs_iter_state *iter = seq->private; + + iter->l = NULL; return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; } static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; - struct list_head *e, *l = seq->private; + struct ip_vs_iter_state *iter = seq->private; + struct list_head *e, *l = iter->l; int idx; ++*pos; @@ -932,18 +946,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { - seq->private = &ip_vs_conn_tab[idx]; + iter->l = &ip_vs_conn_tab[idx]; return cp; } ct_read_unlock_bh(idx); } - seq->private = NULL; + iter->l = NULL; return NULL; } static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) { - struct list_head *l = seq->private; + struct ip_vs_iter_state *iter = seq->private; + struct list_head *l = iter->l; if (l) ct_read_unlock_bh(l - ip_vs_conn_tab); @@ -957,9 +972,12 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); else { const struct ip_vs_conn *cp = v; + struct net *net = seq_file_net(seq); char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; + if (!ip_vs_conn_net_eq(cp, net)) + return 0; if (cp->pe_data) { pe_data[0] = ' '; len = strlen(cp->pe->name); @@ -1004,7 +1022,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = { static int ip_vs_conn_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_conn_seq_ops); + return seq_open_net(inode, file, &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state)); } static const struct file_operations ip_vs_conn_fops = { @@ -1031,6 +1050,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); else { const struct ip_vs_conn *cp = v; + struct net *net = seq_file_net(seq); + + if (!ip_vs_conn_net_eq(cp, net)) + return 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -1067,7 +1090,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_conn_sync_seq_ops); + return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state)); } static const struct file_operations ip_vs_conn_sync_fops = { @@ -1168,10 +1192,11 @@ void ip_vs_random_dropentry(void) /* * Flush all the connection entries in the ip_vs_conn_tab */ -static void ip_vs_conn_flush(void) +static void ip_vs_conn_flush(struct net *net) { int idx; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs = net_ipvs(net); flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { @@ -1181,7 +1206,8 @@ static void ip_vs_conn_flush(void) ct_write_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { - + if (!ip_vs_conn_net_eq(cp, net)) + continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); if (cp->control) { @@ -1194,7 +1220,7 @@ static void ip_vs_conn_flush(void) /* the counter may be not NULL, because maybe some conn entries are run by slow timer handler or unhashed but still referred */ - if (atomic_read(&ip_vs_conn_count) != 0) { + if (atomic_read(&ipvs->conn_count) != 0) { schedule(); goto flush_again; } @@ -1204,8 +1230,11 @@ static void ip_vs_conn_flush(void) */ int __net_init __ip_vs_conn_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + atomic_set(&ipvs->conn_count, 0); proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); @@ -1217,6 +1246,8 @@ static void __net_exit __ip_vs_conn_cleanup(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; + /* flush all the connection entries first */ + ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn_sync"); } @@ -1277,9 +1308,6 @@ int __init ip_vs_conn_init(void) void ip_vs_conn_cleanup(void) { unregister_pernet_subsys(&ipvs_conn_ops); - /* flush all the connection entries first */ - ip_vs_conn_flush(); - /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 7e6a2a0..7205b49 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -205,7 +205,8 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { - ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); + ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, + vport, p); p->pe = svc->pe; if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); @@ -348,8 +349,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port, - &iph.daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, + src_port, &iph.daddr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { @@ -464,8 +465,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, - pptr[0], &iph.daddr, pptr[1], &p); + + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, + &iph.saddr, pptr[0], &iph.daddr, pptr[1], + &p); cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); @@ -532,7 +535,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->af, iph.protocol, + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &daddr, 0, diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 77b0036..6a04f9a 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -198,13 +198,15 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, iph->protocol, - &from, port, &cp->caddr, 0, &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + iph->protocol, &from, port, + &cp->caddr, 0, &p); n_cp = ip_vs_conn_out_get(&p); } if (!n_cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, + ip_vs_conn_fill_param(ip_vs_conn_net(cp), + AF_INET, IPPROTO_TCP, &cp->caddr, 0, &cp->vaddr, port, &p); n_cp = ip_vs_conn_new(&p, &from, port, IP_VS_CONN_F_NO_CPORT | @@ -361,9 +363,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, - &cp->vaddr, htons(ntohs(cp->vport)-1), - &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + iph->protocol, &to, port, &cp->vaddr, + htons(ntohs(cp->vport)-1), &p); n_cp = ip_vs_conn_in_get(&p); if (!n_cp) { n_cp = ip_vs_conn_new(&p, &cp->daddr, diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 4680647..f454c80 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, struct nf_conntrack_tuple *orig, new_reply; struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = nf_ct_net(ct); if (exp->tuple.src.l3num != PF_INET) return; @@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, /* RS->CLIENT */ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, + ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, &orig->src.u3, orig->src.u.tcp.port, &orig->dst.u3, orig->dst.u.tcp.port, &p); cp = ip_vs_conn_out_get(&p); @@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); + h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, + &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); /* Show what happens instead of calling nf_ct_kill() */ diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 28039cb..5b8eb8b 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -41,15 +41,16 @@ struct isakmp_hdr { #define PORT_ISAKMP 500 static void -ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, - int inverse, struct ip_vs_conn_param *p) +ah_esp_conn_fill_param_proto(struct net *net, int af, + const struct ip_vs_iphdr *iph, int inverse, + struct ip_vs_conn_param *p) { if (likely(!inverse)) - ip_vs_conn_fill_param(af, IPPROTO_UDP, + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->saddr, htons(PORT_ISAKMP), &iph->daddr, htons(PORT_ISAKMP), p); else - ip_vs_conn_fill_param(af, IPPROTO_UDP, + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->daddr, htons(PORT_ISAKMP), &iph->saddr, htons(PORT_ISAKMP), p); } @@ -61,8 +62,9 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, { struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); cp = ip_vs_conn_in_get(&p); if (!cp) { /* @@ -89,8 +91,9 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, { struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); cp = ip_vs_conn_out_get(&p); if (!cp) { IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 569e77b..550365a 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1055,7 +1055,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) static int sctp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 757aaaf..d8b3f9f 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -620,7 +620,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc) static int tcp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 1dc3941..581157b 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -396,7 +396,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc) static int udp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c29e73d..f85e47d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -660,21 +660,21 @@ control: * fill_param used by version 1 */ static inline int -ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, +ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, struct ip_vs_conn_param *p, __u8 *pe_data, unsigned int pe_data_len, __u8 *pe_name, unsigned int pe_name_len) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - ip_vs_conn_fill_param(af, sc->v6.protocol, + ip_vs_conn_fill_param(net, af, sc->v6.protocol, (const union nf_inet_addr *)&sc->v6.caddr, sc->v6.cport, (const union nf_inet_addr *)&sc->v6.vaddr, sc->v6.vport, p); else #endif - ip_vs_conn_fill_param(af, sc->v4.protocol, + ip_vs_conn_fill_param(net, af, sc->v4.protocol, (const union nf_inet_addr *)&sc->v4.caddr, sc->v4.cport, (const union nf_inet_addr *)&sc->v4.vaddr, @@ -881,7 +881,7 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer, } } - ip_vs_conn_fill_param(AF_INET, s->protocol, + ip_vs_conn_fill_param(net, AF_INET, s->protocol, (const union nf_inet_addr *)&s->caddr, s->cport, (const union nf_inet_addr *)&s->vaddr, @@ -1043,9 +1043,8 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) state = 0; } } - if (ip_vs_conn_fill_param_sync(af, s, ¶m, - pe_data, pe_data_len, - pe_name, pe_name_len)) { + if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, + pe_data_len, pe_name, pe_name_len)) { retc = 50; goto out; } -- cgit v1.1 From a0840e2e165a370ca24a59545e564e9881a55891 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:58 +0100 Subject: IPVS: netns, ip_vs_ctl local vars moved to ipvs struct. Moving global vars to ipvs struct, except for svc table lock. Next patch for ctl will be drop-rate handling. *v3 __ip_vs_mutex remains global ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 7 +- net/netfilter/ipvs/ip_vs_core.c | 34 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 291 ++++++++++++++++++---------------- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 9 +- 7 files changed, 184 insertions(+), 163 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 0d5e4fe..5ba205a 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) int ip_vs_check_template(struct ip_vs_conn *ct) { struct ip_vs_dest *dest = ct->dest; + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); /* * Checking the dest server status. */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - (sysctl_ip_vs_expire_quiescent_template && + (ipvs->sysctl_expire_quiescent_template && (atomic_read(&dest->weight) == 0))) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " @@ -879,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, * IP_VS_CONN_F_ONE_PACKET too. */ - if (ip_vs_conntrack_enabled()) + if (ip_vs_conntrack_enabled(ipvs)) cp->flags |= IP_VS_CONN_F_NFCT; /* Hash it in the ip_vs_conn_tab finally */ @@ -1198,7 +1199,7 @@ static void ip_vs_conn_flush(struct net *net) struct ip_vs_conn *cp; struct netns_ipvs *ipvs = net_ipvs(net); - flush_again: +flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { /* * Lock is actually needed in this loop. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 7205b49..a7c59a7 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -499,6 +499,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd) { + struct netns_ipvs *ipvs; __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; int unicast; @@ -521,7 +522,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ - if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { + ipvs = net_ipvs(skb_net(skb)); + if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && @@ -733,6 +735,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl) { + struct netns_ipvs *ipvs; unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { @@ -754,6 +757,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (!skb_make_writable(skb, offset)) goto out; + ipvs = net_ipvs(skb_net(skb)); + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); @@ -763,11 +768,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) goto out; } else #endif - if ((sysctl_ip_vs_snat_reroute || + if ((ipvs->sysctl_snat_reroute || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) goto out; @@ -979,6 +984,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { struct ip_vs_protocol *pp = pd->pp; + struct netns_ipvs *ipvs; IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); @@ -1014,13 +1020,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ + ipvs = net_ipvs(skb_net(skb)); + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) goto drop; } else #endif - if ((sysctl_ip_vs_snat_reroute || + if ((ipvs->sysctl_snat_reroute || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; @@ -1057,6 +1065,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs; EnterFunction(11); @@ -1131,10 +1140,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); + ipvs = net_ipvs(net); if (likely(cp)) return handle_response(af, skb, pd, cp, iph.len); - if (sysctl_ip_vs_nat_icmp_send && + if (ipvs->sysctl_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { @@ -1580,7 +1590,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (sysctl_ip_vs_expire_nodest_conn) { + if (ipvs->sysctl_expire_nodest_conn) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } @@ -1610,15 +1620,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) - pkts = sysctl_ip_vs_sync_threshold[0]; + pkts = ipvs->sysctl_sync_threshold[0]; else pkts = atomic_add_return(1, &cp->in_pkts); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ipvs->sysctl_sync_threshold[1] + == ipvs->sysctl_sync_threshold[0])) || (cp->old_state != cp->state && ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || @@ -1632,8 +1642,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ipvs->sysctl_sync_threshold[1] + == ipvs->sysctl_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index cbd58c6..183ac18 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -58,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex); /* lock for service table */ static DEFINE_RWLOCK(__ip_vs_svc_lock); -/* lock for table with the real services */ -static DEFINE_RWLOCK(__ip_vs_rs_lock); - -/* lock for state and timeout tables */ -static DEFINE_SPINLOCK(ip_vs_securetcp_lock); - -/* lock for drop entry handling */ -static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); - -/* lock for drop packet handling */ -static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); - -/* 1/rate drop and drop-entry variables */ -int ip_vs_drop_rate = 0; -int ip_vs_drop_counter = 0; -static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); - -/* number of virtual services */ -static int ip_vs_num_services = 0; - /* sysctl variables */ -static int sysctl_ip_vs_drop_entry = 0; -static int sysctl_ip_vs_drop_packet = 0; -static int sysctl_ip_vs_secure_tcp = 0; -static int sysctl_ip_vs_amemthresh = 1024; -static int sysctl_ip_vs_am_droprate = 10; -int sysctl_ip_vs_cache_bypass = 0; -int sysctl_ip_vs_expire_nodest_conn = 0; -int sysctl_ip_vs_expire_quiescent_template = 0; -int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; -int sysctl_ip_vs_nat_icmp_send = 0; -#ifdef CONFIG_IP_VS_NFCT -int sysctl_ip_vs_conntrack; -#endif -int sysctl_ip_vs_snat_reroute = 1; -int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */ #ifdef CONFIG_IP_VS_DEBUG static int sysctl_ip_vs_debug_level = 0; @@ -142,73 +107,73 @@ static void update_defense_level(struct netns_ipvs *ipvs) /* si_swapinfo(&i); */ /* availmem = availmem - (i.totalswap - i.freeswap); */ - nomem = (availmem < sysctl_ip_vs_amemthresh); + nomem = (availmem < ipvs->sysctl_amemthresh); local_bh_disable(); /* drop_entry */ - spin_lock(&__ip_vs_dropentry_lock); - switch (sysctl_ip_vs_drop_entry) { + spin_lock(&ipvs->dropentry_lock); + switch (ipvs->sysctl_drop_entry) { case 0: - atomic_set(&ip_vs_dropentry, 0); + atomic_set(&ipvs->dropentry, 0); break; case 1: if (nomem) { - atomic_set(&ip_vs_dropentry, 1); - sysctl_ip_vs_drop_entry = 2; + atomic_set(&ipvs->dropentry, 1); + ipvs->sysctl_drop_entry = 2; } else { - atomic_set(&ip_vs_dropentry, 0); + atomic_set(&ipvs->dropentry, 0); } break; case 2: if (nomem) { - atomic_set(&ip_vs_dropentry, 1); + atomic_set(&ipvs->dropentry, 1); } else { - atomic_set(&ip_vs_dropentry, 0); - sysctl_ip_vs_drop_entry = 1; + atomic_set(&ipvs->dropentry, 0); + ipvs->sysctl_drop_entry = 1; }; break; case 3: - atomic_set(&ip_vs_dropentry, 1); + atomic_set(&ipvs->dropentry, 1); break; } - spin_unlock(&__ip_vs_dropentry_lock); + spin_unlock(&ipvs->dropentry_lock); /* drop_packet */ - spin_lock(&__ip_vs_droppacket_lock); - switch (sysctl_ip_vs_drop_packet) { + spin_lock(&ipvs->droppacket_lock); + switch (ipvs->sysctl_drop_packet) { case 0: - ip_vs_drop_rate = 0; + ipvs->drop_rate = 0; break; case 1: if (nomem) { - ip_vs_drop_rate = ip_vs_drop_counter - = sysctl_ip_vs_amemthresh / - (sysctl_ip_vs_amemthresh-availmem); - sysctl_ip_vs_drop_packet = 2; + ipvs->drop_rate = ipvs->drop_counter + = ipvs->sysctl_amemthresh / + (ipvs->sysctl_amemthresh-availmem); + ipvs->sysctl_drop_packet = 2; } else { - ip_vs_drop_rate = 0; + ipvs->drop_rate = 0; } break; case 2: if (nomem) { - ip_vs_drop_rate = ip_vs_drop_counter - = sysctl_ip_vs_amemthresh / - (sysctl_ip_vs_amemthresh-availmem); + ipvs->drop_rate = ipvs->drop_counter + = ipvs->sysctl_amemthresh / + (ipvs->sysctl_amemthresh-availmem); } else { - ip_vs_drop_rate = 0; - sysctl_ip_vs_drop_packet = 1; + ipvs->drop_rate = 0; + ipvs->sysctl_drop_packet = 1; } break; case 3: - ip_vs_drop_rate = sysctl_ip_vs_am_droprate; + ipvs->drop_rate = ipvs->sysctl_am_droprate; break; } - spin_unlock(&__ip_vs_droppacket_lock); + spin_unlock(&ipvs->droppacket_lock); /* secure_tcp */ - spin_lock(&ip_vs_securetcp_lock); - switch (sysctl_ip_vs_secure_tcp) { + spin_lock(&ipvs->securetcp_lock); + switch (ipvs->sysctl_secure_tcp) { case 0: if (old_secure_tcp >= 2) to_change = 0; @@ -217,7 +182,7 @@ static void update_defense_level(struct netns_ipvs *ipvs) if (nomem) { if (old_secure_tcp < 2) to_change = 1; - sysctl_ip_vs_secure_tcp = 2; + ipvs->sysctl_secure_tcp = 2; } else { if (old_secure_tcp >= 2) to_change = 0; @@ -230,7 +195,7 @@ static void update_defense_level(struct netns_ipvs *ipvs) } else { if (old_secure_tcp >= 2) to_change = 0; - sysctl_ip_vs_secure_tcp = 1; + ipvs->sysctl_secure_tcp = 1; } break; case 3: @@ -238,11 +203,11 @@ static void update_defense_level(struct netns_ipvs *ipvs) to_change = 1; break; } - old_secure_tcp = sysctl_ip_vs_secure_tcp; + old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(ipvs, - sysctl_ip_vs_secure_tcp > 1); - spin_unlock(&ip_vs_securetcp_lock); + ipvs->sysctl_secure_tcp > 1); + spin_unlock(&ipvs->securetcp_lock); local_bh_enable(); } @@ -260,7 +225,7 @@ static void defense_work_handler(struct work_struct *work) struct netns_ipvs *ipvs = net_ipvs(&init_net); update_defense_level(ipvs); - if (atomic_read(&ip_vs_dropentry)) + if (atomic_read(&ipvs->dropentry)) ip_vs_random_dropentry(); schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); @@ -602,7 +567,7 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, */ hash = ip_vs_rs_hashkey(af, daddr, dport); - read_lock(&__ip_vs_rs_lock); + read_lock(&ipvs->rs_lock); list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { if ((dest->af == af) && ip_vs_addr_equal(af, &dest->addr, daddr) @@ -610,11 +575,11 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, && ((dest->protocol == protocol) || dest->vfwmark)) { /* HIT */ - read_unlock(&__ip_vs_rs_lock); + read_unlock(&ipvs->rs_lock); return dest; } } - read_unlock(&__ip_vs_rs_lock); + read_unlock(&ipvs->rs_lock); return NULL; } @@ -788,9 +753,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, * Put the real service in rs_table if not present. * For now only for NAT! */ - write_lock_bh(&__ip_vs_rs_lock); + write_lock_bh(&ipvs->rs_lock); ip_vs_rs_hash(ipvs, dest); - write_unlock_bh(&__ip_vs_rs_lock); + write_unlock_bh(&ipvs->rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -1022,14 +987,16 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) */ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_kill_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. */ - write_lock_bh(&__ip_vs_rs_lock); + write_lock_bh(&ipvs->rs_lock); ip_vs_rs_unhash(dest); - write_unlock_bh(&__ip_vs_rs_lock); + write_unlock_bh(&ipvs->rs_lock); /* * Decrease the refcnt of the dest, and free the dest @@ -1092,7 +1059,6 @@ static int ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - struct net *net = svc->net; __be16 dport = udest->port; EnterFunction(2); @@ -1121,7 +1087,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(net, dest); + __ip_vs_del_dest(svc->net, dest); LeaveFunction(2); @@ -1140,6 +1106,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; + struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); @@ -1219,7 +1186,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) - ip_vs_num_services++; + ipvs->num_services++; /* Hash the service into the service table */ write_lock_bh(&__ip_vs_svc_lock); @@ -1359,12 +1326,13 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; struct ip_vs_pe *old_pe; + struct netns_ipvs *ipvs = net_ipvs(svc->net); pr_info("%s: enter\n", __func__); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) - ip_vs_num_services--; + ipvs->num_services--; ip_vs_kill_estimator(svc->net, &svc->stats); @@ -1589,42 +1557,31 @@ proc_do_sync_mode(ctl_table *table, int write, /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) + * Do not change order or insert new entries without + * align with netns init in __ip_vs_control_init() */ static struct ctl_table vs_vars[] = { { .procname = "amemthresh", - .data = &sysctl_ip_vs_amemthresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#ifdef CONFIG_IP_VS_DEBUG - { - .procname = "debug_level", - .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, -#endif { .procname = "am_droprate", - .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "drop_entry", - .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, }, { .procname = "drop_packet", - .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, @@ -1632,7 +1589,6 @@ static struct ctl_table vs_vars[] = { #ifdef CONFIG_IP_VS_NFCT { .procname = "conntrack", - .data = &sysctl_ip_vs_conntrack, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -1640,25 +1596,62 @@ static struct ctl_table vs_vars[] = { #endif { .procname = "secure_tcp", - .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, }, { .procname = "snat_reroute", - .data = &sysctl_ip_vs_snat_reroute, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, { .procname = "sync_version", - .data = &sysctl_ip_vs_sync_ver, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_do_sync_mode, }, + { + .procname = "cache_bypass", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "expire_nodest_conn", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "expire_quiescent_template", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sync_threshold", + .maxlen = + sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), + .mode = 0644, + .proc_handler = proc_do_sync_threshold, + }, + { + .procname = "nat_icmp_send", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_IP_VS_DEBUG + { + .procname = "debug_level", + .data = &sysctl_ip_vs_debug_level, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #if 0 { .procname = "timeout_established", @@ -1745,41 +1738,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec_jiffies, }, #endif - { - .procname = "cache_bypass", - .data = &sysctl_ip_vs_cache_bypass, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "expire_nodest_conn", - .data = &sysctl_ip_vs_expire_nodest_conn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "expire_quiescent_template", - .data = &sysctl_ip_vs_expire_quiescent_template, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "sync_threshold", - .data = &sysctl_ip_vs_sync_threshold, - .maxlen = sizeof(sysctl_ip_vs_sync_threshold), - .mode = 0644, - .proc_handler = proc_do_sync_threshold, - }, - { - .procname = "nat_icmp_send", - .data = &sysctl_ip_vs_nat_icmp_send, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { } }; @@ -1791,8 +1749,6 @@ const struct ctl_path net_vs_ctl_path[] = { }; EXPORT_SYMBOL_GPL(net_vs_ctl_path); -static struct ctl_table_header * sysctl_header; - #ifdef CONFIG_PROC_FS struct ip_vs_iter { @@ -2543,7 +2499,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_getinfo info; info.version = IP_VS_VERSION_CODE; info.size = ip_vs_conn_tab_size; - info.num_services = ip_vs_num_services; + info.num_services = ipvs->num_services; if (copy_to_user(user, &info, sizeof(info)) != 0) ret = -EFAULT; } @@ -3014,7 +2970,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; - struct net *net; + struct net *net = skb_sknet(skb); mutex_lock(&__ip_vs_mutex); @@ -3023,7 +2979,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) goto out_err; - net = skb_sknet(skb); + svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3215,8 +3171,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; struct net *net; + struct netns_ipvs *ipvs; net = skb_sknet(skb); + ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); @@ -3326,8 +3284,10 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) void *reply; int ret, cmd, reply_cmd; struct net *net; + struct netns_ipvs *ipvs; net = skb_sknet(skb); + ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3530,9 +3490,21 @@ int __net_init __ip_vs_control_init(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); + struct ctl_table *tbl; if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + + atomic_set(&ipvs->dropentry, 0); + spin_lock_init(&ipvs->dropentry_lock); + spin_lock_init(&ipvs->droppacket_lock); + spin_lock_init(&ipvs->securetcp_lock); + ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + + /* Initialize rs_table */ + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + /* procfs stats */ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); if (ipvs->tot_stats == NULL) { @@ -3553,14 +3525,51 @@ int __net_init __ip_vs_control_init(struct net *net) proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); proc_net_fops_create(net, "ip_vs_stats_percpu", 0, &ip_vs_stats_percpu_fops); - sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + + if (!net_eq(net, &init_net)) { + tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + } else + tbl = vs_vars; + /* Initialize sysctl defaults */ + idx = 0; + ipvs->sysctl_amemthresh = 1024; + tbl[idx++].data = &ipvs->sysctl_amemthresh; + ipvs->sysctl_am_droprate = 10; + tbl[idx++].data = &ipvs->sysctl_am_droprate; + tbl[idx++].data = &ipvs->sysctl_drop_entry; + tbl[idx++].data = &ipvs->sysctl_drop_packet; +#ifdef CONFIG_IP_VS_NFCT + tbl[idx++].data = &ipvs->sysctl_conntrack; +#endif + tbl[idx++].data = &ipvs->sysctl_secure_tcp; + ipvs->sysctl_snat_reroute = 1; + tbl[idx++].data = &ipvs->sysctl_snat_reroute; + ipvs->sysctl_sync_ver = 1; + tbl[idx++].data = &ipvs->sysctl_sync_ver; + tbl[idx++].data = &ipvs->sysctl_cache_bypass; + tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; + tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; + ipvs->sysctl_sync_threshold[0] = 3; + ipvs->sysctl_sync_threshold[1] = 50; + tbl[idx].data = &ipvs->sysctl_sync_threshold; + tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); + tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; + + + ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, vs_vars); - if (sysctl_header == NULL) + if (ipvs->sysctl_hdr == NULL) goto err_reg; ip_vs_new_estimator(net, ipvs->tot_stats); + ipvs->sysctl_tbl = tbl; return 0; err_reg: + if (!net_eq(net, &init_net)) + kfree(tbl); +err_dup: free_percpu(ipvs->cpustats); err_alloc: kfree(ipvs->tot_stats); @@ -3575,7 +3584,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) return; ip_vs_kill_estimator(net, ipvs->tot_stats); - unregister_net_sysctl_table(sysctl_header); + unregister_net_sysctl_table(ipvs->sysctl_hdr); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 550365a..fb2d04a 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -34,7 +34,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, &iph.daddr, sh->dest))) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d8b3f9f..c0cc341 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -54,7 +54,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, &iph.daddr, th->dest))) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 581157b..f1282cb 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -50,7 +50,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (svc) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f85e47d..b178056 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode) if (!ipvs->sync_state & IP_VS_STATE_MASTER) return; - if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff) + if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) return; spin_lock_bh(&ipvs->sync_buff_lock); @@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ - if (sysctl_ip_vs_sync_ver == 0) { + if (ipvs->sysctl_sync_ver == 0) { ip_vs_sync_conn_v0(net, cp); return; } @@ -650,7 +650,7 @@ control: if (cp->flags & IP_VS_CONN_F_TEMPLATE) { int pkts = atomic_add_return(1, &cp->in_pkts); - if (pkts % sysctl_ip_vs_sync_threshold[1] != 1) + if (pkts % ipvs->sysctl_sync_threshold[1] != 1) return; } goto sloop; @@ -724,6 +724,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, { struct ip_vs_dest *dest; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs = net_ipvs(net); if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(param); @@ -794,7 +795,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); + atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); cp->state = state; cp->old_state = cp->state; /* -- cgit v1.1 From f6340ee0c6b9498ec918a7bb2f44e20abb8b2833 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:59 +0100 Subject: IPVS: netns, defense work timer. This patch makes defense work timer per name-space, A net ptr had to be added to the ipvs struct, since it's needed by defense_work_handler. [ horms@verge.net.au: Use cancel_delayed_work_sync() instead of cancel_rearming_delayed_work(). Found during merge conflict resoliution ] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 5 +++-- net/netfilter/ipvs/ip_vs_core.c | 1 + net/netfilter/ipvs/ip_vs_ctl.c | 20 +++++++++----------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 5ba205a..28bdaf7 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1138,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) } /* Called from keventd and must protect itself from softirqs */ -void ip_vs_random_dropentry(void) +void ip_vs_random_dropentry(struct net *net) { int idx; struct ip_vs_conn *cp; @@ -1158,7 +1158,8 @@ void ip_vs_random_dropentry(void) if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; - + if (!ip_vs_conn_net_eq(cp, net)) + continue; if (cp->protocol == IPPROTO_TCP) { switch(cp->state) { case IP_VS_TCP_S_SYN_RECV: diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a7c59a7..bdda346 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1884,6 +1884,7 @@ static int __net_init __ip_vs_init(struct net *net) pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 183ac18..6a963d4 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -217,18 +217,16 @@ static void update_defense_level(struct netns_ipvs *ipvs) * Timer for checking the defense */ #define DEFENSE_TIMER_PERIOD 1*HZ -static void defense_work_handler(struct work_struct *work); -static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); static void defense_work_handler(struct work_struct *work) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = + container_of(work, struct netns_ipvs, defense_work.work); update_defense_level(ipvs); if (atomic_read(&ipvs->dropentry)) - ip_vs_random_dropentry(); - - schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); + ip_vs_random_dropentry(ipvs->net); + schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); } int @@ -3564,6 +3562,9 @@ int __net_init __ip_vs_control_init(struct net *net) goto err_reg; ip_vs_new_estimator(net, ipvs->tot_stats); ipvs->sysctl_tbl = tbl; + /* Schedule defense work */ + INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); + schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); return 0; err_reg: @@ -3588,6 +3589,8 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); + cancel_delayed_work_sync(&ipvs->defense_work); + cancel_work_sync(&ipvs->defense_work.work); free_percpu(ipvs->cpustats); kfree(ipvs->tot_stats); } @@ -3631,9 +3634,6 @@ int __init ip_vs_control_init(void) goto err_net; } - /* Hook the defense timer */ - schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); - LeaveFunction(2); return 0; @@ -3648,8 +3648,6 @@ void ip_vs_control_cleanup(void) { EnterFunction(2); ip_vs_trash_cleanup(); - cancel_delayed_work_sync(&defense_work); - cancel_work_sync(&defense_work.work); unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); -- cgit v1.1 From f2431e6e9255461eb1476340a89ad32ad4b38b03 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:45:00 +0100 Subject: IPVS: netns, trash handling trash list per namspace, and reordering of some params in dst struct. [ horms@verge.net.au: Use cancel_delayed_work_sync() instead of cancel_rearming_delayed_work(). Found during merge conflict resoliution ] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6a963d4..442edf4 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -255,11 +255,6 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; /* - * Trash for destinations - */ -static LIST_HEAD(ip_vs_dest_trash); - -/* * FTP & NULL virtual service counters */ static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); @@ -650,11 +645,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __be16 dport) { struct ip_vs_dest *dest, *nxt; + struct netns_ipvs *ipvs = net_ipvs(svc->net); /* * Find the destination in trash */ - list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, @@ -703,11 +699,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * are expired, and the refcnt of each destination in the trash must * be 1, so we simply release them here. */ -static void ip_vs_trash_cleanup(void) +static void ip_vs_trash_cleanup(struct net *net) { struct ip_vs_dest *dest, *nxt; + struct netns_ipvs *ipvs = net_ipvs(net); - list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); @@ -1021,7 +1018,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - list_add(&dest->n_list, &ip_vs_dest_trash); + list_add(&dest->n_list, &ipvs->dest_trash); atomic_inc(&dest->refcnt); } } @@ -3503,6 +3500,8 @@ int __net_init __ip_vs_control_init(struct net *net) for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) INIT_LIST_HEAD(&ipvs->rs_table[idx]); + INIT_LIST_HEAD(&ipvs->dest_trash); + /* procfs stats */ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); if (ipvs->tot_stats == NULL) { @@ -3584,13 +3583,14 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; + ip_vs_trash_cleanup(net); ip_vs_kill_estimator(net, ipvs->tot_stats); + cancel_delayed_work_sync(&ipvs->defense_work); + cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); - cancel_delayed_work_sync(&ipvs->defense_work); - cancel_work_sync(&ipvs->defense_work.work); free_percpu(ipvs->cpustats); kfree(ipvs->tot_stats); } @@ -3647,7 +3647,6 @@ err: void ip_vs_control_cleanup(void) { EnterFunction(2); - ip_vs_trash_cleanup(); unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); -- cgit v1.1 From 763f8d0ed4f1ce38b35cc0e05482b7799b82789b Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:45:01 +0100 Subject: IPVS: netns, svc counters moved in ip_vs_ctl,c Last two global vars to be moved, ip_vs_ftpsvc_counter and ip_vs_nullsvc_counter. [horms@verge.net.au: removed whitespace-change-only hunk] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 442edf4..65f5de4 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -254,12 +254,6 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; -/* - * FTP & NULL virtual service counters - */ -static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); -static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); - /* * Returns hash value for virtual service @@ -409,6 +403,7 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; + struct netns_ipvs *ipvs = net_ipvs(net); read_lock(&__ip_vs_svc_lock); @@ -427,7 +422,7 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, if (svc == NULL && protocol == IPPROTO_TCP - && atomic_read(&ip_vs_ftpsvc_counter) + && atomic_read(&ipvs->ftpsvc_counter) && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { /* * Check if ftp service entry exists, the packet @@ -437,7 +432,7 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, } if (svc == NULL - && atomic_read(&ip_vs_nullsvc_counter)) { + && atomic_read(&ipvs->nullsvc_counter)) { /* * Check if the catch-all port (port zero) exists */ @@ -1173,9 +1168,9 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, /* Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_inc(&ip_vs_ftpsvc_counter); + atomic_inc(&ipvs->ftpsvc_counter); else if (svc->port == 0) - atomic_inc(&ip_vs_nullsvc_counter); + atomic_inc(&ipvs->nullsvc_counter); ip_vs_new_estimator(net, &svc->stats); @@ -1359,9 +1354,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) * Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_dec(&ip_vs_ftpsvc_counter); + atomic_dec(&ipvs->ftpsvc_counter); else if (svc->port == 0) - atomic_dec(&ip_vs_nullsvc_counter); + atomic_dec(&ipvs->nullsvc_counter); /* * Free the service if nobody refers to it @@ -3501,6 +3496,8 @@ int __net_init __ip_vs_control_init(struct net *net) INIT_LIST_HEAD(&ipvs->rs_table[idx]); INIT_LIST_HEAD(&ipvs->dest_trash); + atomic_set(&ipvs->ftpsvc_counter, 0); + atomic_set(&ipvs->nullsvc_counter, 0); /* procfs stats */ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); -- cgit v1.1 From 4a98480bccc2f5998c5564d254392635b9aa04c2 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:45:02 +0100 Subject: IPVS: netns, misc init_net removal in core. init_net removed in __ip_vs_addr_is_local_v6, and got net as param. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 6 ++++-- net/netfilter/ipvs/ip_vs_ctl.c | 9 +++++---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index bdda346..9e10c7a 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -499,6 +499,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd) { + struct net *net; struct netns_ipvs *ipvs; __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; @@ -511,18 +512,19 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_service_put(svc); return NF_DROP; } + net = skb_net(skb); #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; else #endif - unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); + unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ - ipvs = net_ipvs(skb_net(skb)); + ipvs = net_ipvs(net); if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 65f5de4..edf2b6d 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -71,7 +71,8 @@ int ip_vs_get_debug_level(void) #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ -static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) +static int __ip_vs_addr_is_local_v6(struct net *net, + const struct in6_addr *addr) { struct rt6_info *rt; struct flowi fl = { @@ -80,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, }; - rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl); if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) return 1; @@ -810,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && - !__ip_vs_addr_is_local_v6(&udest->addr.in6)) + !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) return -EINVAL; } else #endif { - atype = inet_addr_type(&init_net, udest->addr.ip); + atype = inet_addr_type(svc->net, udest->addr.ip); if (atype != RTN_LOCAL && atype != RTN_UNICAST) return -EINVAL; } -- cgit v1.1 From c6d2d445d8dee04cde47eb4021636399a4239e9f Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:45:03 +0100 Subject: IPVS: netns, final patch enabling network name space. all init_net removed, (except for some alloc related that needs to be there) Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_app.c | 3 --- net/netfilter/ipvs/ip_vs_conn.c | 5 ----- net/netfilter/ipvs/ip_vs_core.c | 4 ---- net/netfilter/ipvs/ip_vs_ctl.c | 7 +------ net/netfilter/ipvs/ip_vs_est.c | 3 --- net/netfilter/ipvs/ip_vs_ftp.c | 6 ------ net/netfilter/ipvs/ip_vs_sync.c | 5 ----- 7 files changed, 1 insertion(+), 32 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 286f465..5c48ffb 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -582,9 +582,6 @@ static int __net_init __ip_vs_app_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - INIT_LIST_HEAD(&ipvs->app_list); __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 28bdaf7..83233fe 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1234,8 +1234,6 @@ int __net_init __ip_vs_conn_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; atomic_set(&ipvs->conn_count, 0); proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); @@ -1245,9 +1243,6 @@ int __net_init __ip_vs_conn_init(struct net *net) static void __net_exit __ip_vs_conn_cleanup(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return; - /* flush all the connection entries first */ ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 9e10c7a..f36a84f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1877,10 +1877,6 @@ static int __net_init __ip_vs_init(struct net *net) { struct netns_ipvs *ipvs; - if (!net_eq(net, &init_net)) { - pr_err("The final patch for enabling netns is missing\n"); - return -EPERM; - } ipvs = net_generic(net, ip_vs_net_id); if (ipvs == NULL) { pr_err("%s(): no memory.\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index edf2b6d..09ca2ce 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2617,6 +2617,7 @@ static struct genl_family ip_vs_genl_family = { .name = IPVS_GENL_NAME, .version = IPVS_GENL_VERSION, .maxattr = IPVS_CMD_MAX, + .netnsok = true, /* Make ipvsadm to work on netns */ }; /* Policy used for first-level command attributes */ @@ -3483,9 +3484,6 @@ int __net_init __ip_vs_control_init(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); struct ctl_table *tbl; - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - atomic_set(&ipvs->dropentry, 0); spin_lock_init(&ipvs->dropentry_lock); spin_lock_init(&ipvs->droppacket_lock); @@ -3578,9 +3576,6 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return; - ip_vs_trash_cleanup(net); ip_vs_kill_estimator(net, ipvs->tot_stats); cancel_delayed_work_sync(&ipvs->defense_work); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index d13616b..f560a05 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -203,9 +203,6 @@ static int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - INIT_LIST_HEAD(&ipvs->est_list); spin_lock_init(&ipvs->est_lock); setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 6a04f9a..6b5dd6d 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -413,9 +413,6 @@ static int __net_init __ip_vs_ftp_init(struct net *net) int i, ret; struct ip_vs_app *app = &ip_vs_ftp; - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - ret = register_ip_vs_app(net, app); if (ret) return ret; @@ -442,9 +439,6 @@ static void __ip_vs_ftp_exit(struct net *net) { struct ip_vs_app *app = &ip_vs_ftp; - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return; - unregister_ip_vs_app(net, app); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b178056..d1adf98 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1659,9 +1659,6 @@ static int __net_init __ip_vs_sync_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - INIT_LIST_HEAD(&ipvs->sync_queue); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); @@ -1674,8 +1671,6 @@ static int __net_init __ip_vs_sync_init(struct net *net) static void __ip_vs_sync_cleanup(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return; stop_sync_thread(net, IP_VS_STATE_MASTER); stop_sync_thread(net, IP_VS_STATE_BACKUP); } -- cgit v1.1 From b017900aac4a158b9bf7ffdcb8a369a91115b3e4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Dec 2010 09:46:26 +0100 Subject: netfilter: xt_conntrack: support matching on port ranges Add a new revision 3 that contains port ranges for all of origsrc, origdst, replsrc and repldst. The high ports are appended to the original v2 data structure to allow sharing most of the code with v1 and v2. Use of the revision specific port matching function is made dependant on par->match->revision. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_conntrack.c | 75 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index e536710..4ef1b63 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, return true; } +static inline bool +port_match(u16 min, u16 max, u16 port, bool invert) +{ + return (port >= min && port <= max) ^ invert; +} + +static inline bool +ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info, + const struct nf_conn *ct) +{ + const struct nf_conntrack_tuple *tuple; + + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + if ((info->match_flags & XT_CONNTRACK_PROTO) && + (nf_ct_protonum(ct) == info->l4proto) ^ + !(info->invert_flags & XT_CONNTRACK_PROTO)) + return false; + + /* Shortcut to match all recognized protocols by using ->src.all. */ + if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) && + !port_match(info->origsrc_port, info->origsrc_port_high, + ntohs(tuple->src.u.all), + info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT)) + return false; + + if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) && + !port_match(info->origdst_port, info->origdst_port_high, + ntohs(tuple->dst.u.all), + info->invert_flags & XT_CONNTRACK_ORIGDST_PORT)) + return false; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) && + !port_match(info->replsrc_port, info->replsrc_port_high, + ntohs(tuple->src.u.all), + info->invert_flags & XT_CONNTRACK_REPLSRC_PORT)) + return false; + + if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) && + !port_match(info->repldst_port, info->repldst_port_high, + ntohs(tuple->dst.u.all), + info->invert_flags & XT_CONNTRACK_REPLDST_PORT)) + return false; + + return true; +} + static bool conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, u16 state_mask, u16 status_mask) @@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; - if (!ct_proto_port_check(info, ct)) - return false; + if (par->match->revision != 3) { + if (!ct_proto_port_check(info, ct)) + return false; + } else { + if (!ct_proto_port_check_v3(par->matchinfo, ct)) + return false; + } if ((info->match_flags & XT_CONNTRACK_STATUS) && (!!(status_mask & ct->status) ^ @@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) return conntrack_mt(skb, par, info->state_mask, info->status_mask); } +static bool +conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_conntrack_mtinfo3 *info = par->matchinfo; + + return conntrack_mt(skb, par, info->state_mask, info->status_mask); +} + static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; @@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .destroy = conntrack_mt_destroy, .me = THIS_MODULE, }, + { + .name = "conntrack", + .revision = 3, + .family = NFPROTO_UNSPEC, + .matchsize = sizeof(struct xt_conntrack_mtinfo3), + .match = conntrack_mt_v3, + .checkentry = conntrack_mt_check, + .destroy = conntrack_mt_destroy, + .me = THIS_MODULE, + }, }; static int __init conntrack_mt_init(void) -- cgit v1.1 From 255d0dc34068a976550ce555e153c0bfcfec7cc6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 18 Dec 2010 18:35:15 +0100 Subject: netfilter: x_table: speedup compat operations One iptables invocation with 135000 rules takes 35 seconds of cpu time on a recent server, using a 32bit distro and a 64bit kernel. We eventually trigger NMI/RCU watchdog. INFO: rcu_sched_state detected stall on CPU 3 (t=6000 jiffies) COMPAT mode has quadratic behavior and consume 16 bytes of memory per rule. Switch the xt_compat algos to use an array instead of list, and use a binary search to locate an offset in the sorted array. This halves memory need (8 bytes per rule), and removes quadratic behavior [ O(N*N) -> O(N*log2(N)) ] Time of iptables goes from 35 s to 150 ms. Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 1 + net/ipv4/netfilter/arp_tables.c | 2 + net/ipv4/netfilter/ip_tables.c | 2 + net/ipv6/netfilter/ip6_tables.c | 2 + net/netfilter/x_tables.c | 82 ++++++++++++++++++++++++----------------- 5 files changed, 55 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 16df053..5f1825d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info, newinfo->entries_size = size; + xt_compat_init_offsets(AF_INET, info->nentries); return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 3fac340..47e5178 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -883,6 +883,7 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(NFPROTO_ARP, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1350,6 +1351,7 @@ static int translate_compat_table(const char *name, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(NFPROTO_ARP); + xt_compat_init_offsets(NFPROTO_ARP, number); /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, total_size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a846d63..c5a75d7 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1080,6 +1080,7 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(AF_INET, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1681,6 +1682,7 @@ translate_compat_table(struct net *net, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET); + xt_compat_init_offsets(AF_INET, number); /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, total_size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4555823..0c9973a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1093,6 +1093,7 @@ static int compat_table_info(const struct xt_table_info *info, memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(AF_INET6, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) @@ -1696,6 +1697,7 @@ translate_compat_table(struct net *net, duprintf("translate_compat_table: size %u\n", info->size); j = 0; xt_compat_lock(AF_INET6); + xt_compat_init_offsets(AF_INET6, number); /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, total_size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8046350..ee5de3a 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -38,9 +38,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) struct compat_delta { - struct compat_delta *next; - unsigned int offset; - int delta; + unsigned int offset; /* offset in kernel */ + int delta; /* delta in 32bit user land */ }; struct xt_af { @@ -49,7 +48,9 @@ struct xt_af { struct list_head target; #ifdef CONFIG_COMPAT struct mutex compat_mutex; - struct compat_delta *compat_offsets; + struct compat_delta *compat_tab; + unsigned int number; /* number of slots in compat_tab[] */ + unsigned int cur; /* number of used slots in compat_tab[] */ #endif }; @@ -414,54 +415,67 @@ int xt_check_match(struct xt_mtchk_param *par, EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) +int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) { - struct compat_delta *tmp; + struct xt_af *xp = &xt[af]; - tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); - if (!tmp) - return -ENOMEM; + if (!xp->compat_tab) { + if (!xp->number) + return -EINVAL; + xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number); + if (!xp->compat_tab) + return -ENOMEM; + xp->cur = 0; + } - tmp->offset = offset; - tmp->delta = delta; + if (xp->cur >= xp->number) + return -EINVAL; - if (xt[af].compat_offsets) { - tmp->next = xt[af].compat_offsets->next; - xt[af].compat_offsets->next = tmp; - } else { - xt[af].compat_offsets = tmp; - tmp->next = NULL; - } + if (xp->cur) + delta += xp->compat_tab[xp->cur - 1].delta; + xp->compat_tab[xp->cur].offset = offset; + xp->compat_tab[xp->cur].delta = delta; + xp->cur++; return 0; } EXPORT_SYMBOL_GPL(xt_compat_add_offset); void xt_compat_flush_offsets(u_int8_t af) { - struct compat_delta *tmp, *next; - - if (xt[af].compat_offsets) { - for (tmp = xt[af].compat_offsets; tmp; tmp = next) { - next = tmp->next; - kfree(tmp); - } - xt[af].compat_offsets = NULL; + if (xt[af].compat_tab) { + vfree(xt[af].compat_tab); + xt[af].compat_tab = NULL; + xt[af].number = 0; } } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); int xt_compat_calc_jump(u_int8_t af, unsigned int offset) { - struct compat_delta *tmp; - int delta; - - for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) - if (tmp->offset < offset) - delta += tmp->delta; - return delta; + struct compat_delta *tmp = xt[af].compat_tab; + int mid, left = 0, right = xt[af].cur - 1; + + while (left <= right) { + mid = (left + right) >> 1; + if (offset > tmp[mid].offset) + left = mid + 1; + else if (offset < tmp[mid].offset) + right = mid - 1; + else + return mid ? tmp[mid - 1].delta : 0; + } + WARN_ON_ONCE(1); + return 0; } EXPORT_SYMBOL_GPL(xt_compat_calc_jump); +void xt_compat_init_offsets(u_int8_t af, unsigned int number) +{ + xt[af].number = number; + xt[af].cur = 0; +} +EXPORT_SYMBOL(xt_compat_init_offsets); + int xt_compat_match_offset(const struct xt_match *match) { u_int16_t csize = match->compatsize ? : match->matchsize; @@ -1337,7 +1351,7 @@ static int __init xt_init(void) mutex_init(&xt[i].mutex); #ifdef CONFIG_COMPAT mutex_init(&xt[i].compat_mutex); - xt[i].compat_offsets = NULL; + xt[i].compat_tab = NULL; #endif INIT_LIST_HEAD(&xt[i].target); INIT_LIST_HEAD(&xt[i].match); -- cgit v1.1 From 6faee60a4e82075853a437831768cc9e2e563e4e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Dec 2010 15:57:47 +0100 Subject: netfilter: ebt_ip6: allow matching on ipv6-icmp types/codes To avoid adding a new match revision icmp type/code are stored in the sport/dport area. Signed-off-by: Florian Westphal Reviewed-by: Holger Eitzenberger Reviewed-by: Bart De Schuymer Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_ip6.c | 46 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 50a46af..2ed0056 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -22,9 +22,15 @@ #include #include -struct tcpudphdr { - __be16 src; - __be16 dst; +union pkthdr { + struct { + __be16 src; + __be16 dst; + } tcpudphdr; + struct { + u8 type; + u8 code; + } icmphdr; }; static bool @@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct ebt_ip6_info *info = par->matchinfo; const struct ipv6hdr *ih6; struct ipv6hdr _ip6h; - const struct tcpudphdr *pptr; - struct tcpudphdr _ports; + const union pkthdr *pptr; + union pkthdr _pkthdr; ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) @@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) return false; - if (!(info->bitmask & EBT_IP6_DPORT) && - !(info->bitmask & EBT_IP6_SPORT)) + if (!(info->bitmask & ( EBT_IP6_DPORT | + EBT_IP6_SPORT | EBT_IP6_ICMP6))) return true; - pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), - &_ports); + + /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ + pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr), + &_pkthdr); if (pptr == NULL) return false; if (info->bitmask & EBT_IP6_DPORT) { - u32 dst = ntohs(pptr->dst); + u16 dst = ntohs(pptr->tcpudphdr.dst); if (FWINV(dst < info->dport[0] || dst > info->dport[1], EBT_IP6_DPORT)) return false; } if (info->bitmask & EBT_IP6_SPORT) { - u32 src = ntohs(pptr->src); + u16 src = ntohs(pptr->tcpudphdr.src); if (FWINV(src < info->sport[0] || src > info->sport[1], EBT_IP6_SPORT)) return false; } - return true; + if ((info->bitmask & EBT_IP6_ICMP6) && + FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || + pptr->icmphdr.type > info->icmpv6_type[1] || + pptr->icmphdr.code < info->icmpv6_code[0] || + pptr->icmphdr.code > info->icmpv6_code[1], + EBT_IP6_ICMP6)) + return false; } return true; } @@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par) return -EINVAL; if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) return -EINVAL; + if (info->bitmask & EBT_IP6_ICMP6) { + if ((info->invflags & EBT_IP6_PROTO) || + info->protocol != IPPROTO_ICMPV6) + return -EINVAL; + if (info->icmpv6_type[0] > info->icmpv6_type[1] || + info->icmpv6_code[0] > info->icmpv6_code[1]) + return -EINVAL; + } return 0; } -- cgit v1.1 From c7066f70d9610df0b9406cc635fc09e86136e714 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 14 Jan 2011 13:36:42 +0100 Subject: netfilter: fix Kconfig dependencies Fix dependencies of netfilter realm match: it depends on NET_CLS_ROUTE, which itself depends on NET_SCHED; this dependency is missing from netfilter. Since matching on realms is also useful without having NET_SCHED enabled and the option really only controls whether the tclassid member is included in route and dst entries, rename the config option to IP_ROUTE_CLASSID and move it outside of traffic scheduling context to get rid of the NET_SCHED dependeny. Reported-by: Vladis Kletnieks Signed-off-by: Patrick McHardy --- net/ipv4/Kconfig | 4 +++- net/ipv4/fib_rules.c | 10 +++++----- net/ipv4/fib_semantics.c | 14 +++++++------- net/ipv4/ip_input.c | 2 +- net/ipv4/route.c | 26 +++++++++++++------------- net/netfilter/Kconfig | 2 +- net/sched/Kconfig | 5 +---- net/sched/cls_flow.c | 2 +- net/sched/em_meta.c | 2 +- 9 files changed, 33 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 9e95d7f..dcb2e18 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE handled by the klogd daemon which is responsible for kernel messages ("man klogd"). +config IP_ROUTE_CLASSID + bool + config IP_PNP bool "IP: kernel level autoconfiguration" help @@ -655,4 +658,3 @@ config TCP_MD5SIG on the Internet. If unsure, say N. - diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 7981a24..9cefe72 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -41,12 +41,12 @@ struct fib4_rule { __be32 srcmask; __be32 dst; __be32 dstmask; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID u32 tclassid; #endif }; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID u32 fib_rules_tclass(struct fib_result *res) { return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; @@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (frh->dst_len) rule4->dst = nla_get_be32(tb[FRA_DST]); -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (tb[FRA_FLOW]) rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); #endif @@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, if (frh->tos && (rule4->tos != frh->tos)) return 0; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) return 0; #endif @@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, if (rule4->src_len) NLA_PUT_BE32(skb, FRA_SRC, rule4->src); -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (rule4->tclassid) NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); #endif diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3e0da3e..a72c62d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -200,7 +200,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight != onh->nh_weight || #endif -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || #endif ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) @@ -422,7 +422,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, nla = nla_find(attrs, attrlen, RTA_GATEWAY); nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; #endif @@ -476,7 +476,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla && nla_get_be32(nla) != nh->nh_gw) return 1; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); if (nla && nla_get_u32(nla) != nh->nh_tclassid) return 1; @@ -783,7 +783,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto err_inval; if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) goto err_inval; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) goto err_inval; #endif @@ -796,7 +796,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) nh->nh_oif = cfg->fc_oif; nh->nh_gw = cfg->fc_gw; nh->nh_flags = cfg->fc_flags; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1006,7 +1006,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (fi->fib_nh->nh_oif) NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (fi->fib_nh[0].nh_tclassid) NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); #endif @@ -1031,7 +1031,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (nh->nh_gw) NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (nh->nh_tclassid) NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); #endif diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d859bcc..d7b2b09 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb) } } -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (unlikely(skb_dst(skb)->tclassid)) { struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)->tclassid; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 66610ea..f70ae1b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -511,7 +511,7 @@ static const struct file_operations rt_cpu_seq_fops = { .release = seq_release, }; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID static int rt_acct_proc_show(struct seq_file *m, void *v) { struct ip_rt_acct *dst, *src; @@ -564,14 +564,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net) if (!pde) goto err2; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); if (!pde) goto err3; #endif return 0; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID err3: remove_proc_entry("rt_cache", net->proc_net_stat); #endif @@ -585,7 +585,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net) { remove_proc_entry("rt_cache", net->proc_net_stat); remove_proc_entry("rt_cache", net->proc_net); -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID remove_proc_entry("rt_acct", net->proc_net); #endif } @@ -1784,7 +1784,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) memcpy(addr, &src, 4); } -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID static void set_class_tag(struct rtable *rt, u32 tag) { if (!(rt->dst.tclassid & 0xFFFF)) @@ -1811,7 +1811,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) rt->dst.dev->mtu > 576) rt->dst.metrics[RTAX_MTU-1] = 576; } -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; #endif } else @@ -1827,7 +1827,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES set_class_tag(rt, fib_rules_tclass(res)); #endif @@ -1883,7 +1883,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.mark = skb->mark; rth->fl.fl4_src = saddr; rth->rt_src = saddr; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif rth->rt_iif = @@ -2202,7 +2202,7 @@ local_input: rth->fl.mark = skb->mark; rth->fl.fl4_src = saddr; rth->rt_src = saddr; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif rth->rt_iif = @@ -2820,7 +2820,7 @@ static int rt_fill_info(struct net *net, } if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (rt->dst.tclassid) NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); #endif @@ -3245,9 +3245,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = { }; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; -#endif /* CONFIG_NET_CLS_ROUTE */ +#endif /* CONFIG_IP_ROUTE_CLASSID */ static __initdata unsigned long rhash_entries; static int __init set_rhash_entries(char *str) @@ -3263,7 +3263,7 @@ int __init ip_rt_init(void) { int rc = 0; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) panic("IP: failed to allocate ip_rt_acct\n"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 1534f2b..1b79353 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -886,7 +886,7 @@ config NETFILTER_XT_MATCH_RATEEST config NETFILTER_XT_MATCH_REALM tristate '"realm" match support' depends on NETFILTER_ADVANCED - select NET_CLS_ROUTE + select IP_ROUTE_CLASSID help This option adds a `realm' match, which allows you to use the realm key from the routing subsystem inside iptables. diff --git a/net/sched/Kconfig b/net/sched/Kconfig index a36270a..4b753ef 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -243,7 +243,7 @@ config NET_CLS_TCINDEX config NET_CLS_ROUTE4 tristate "Routing decision (ROUTE)" - select NET_CLS_ROUTE + select IP_ROUTE_CLASSID select NET_CLS ---help--- If you say Y here, you will be able to classify packets @@ -252,9 +252,6 @@ config NET_CLS_ROUTE4 To compile this code as a module, choose M here: the module will be called cls_route. -config NET_CLS_ROUTE - bool - config NET_CLS_FW tristate "Netfilter mark (FW)" select NET_CLS diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 5b271a1..a3b293d 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -276,7 +276,7 @@ fallback: static u32 flow_get_rtclassid(const struct sk_buff *skb) { -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID if (skb_dst(skb)) return skb_dst(skb)->tclassid; #endif diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 34da5e2..0d66e58 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -255,7 +255,7 @@ META_COLLECTOR(int_rtclassid) if (unlikely(skb_dst(skb) == NULL)) *err = -1; else -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID dst->value = skb_dst(skb)->tclassid; #else dst->value = 0; -- cgit v1.1 From d862a6622e9db508d4b28cc7c5bc28bd548cc24e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 14 Jan 2011 15:45:56 +0100 Subject: netfilter: nf_conntrack: use is_vmalloc_addr() Use is_vmalloc_addr() in nf_ct_free_hashtable() and get rid of the vmalloc flags to indicate that a hash table has been allocated using vmalloc(). Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_core.c | 6 ++---- net/netfilter/nf_conntrack_core.c | 26 +++++++++----------------- net/netfilter/nf_conntrack_expect.c | 9 +++------ net/netfilter/nf_conntrack_helper.c | 10 +++------- 4 files changed, 17 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index eb55835..6972cee 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -682,8 +682,7 @@ static int __net_init nf_nat_net_init(struct net *net) { /* Leave them the same for the moment. */ net->ipv4.nat_htable_size = net->ct.htable_size; - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, - &net->ipv4.nat_vmalloced, 0); + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); if (!net->ipv4.nat_bysource) return -ENOMEM; return 0; @@ -705,8 +704,7 @@ static void __net_exit nf_nat_net_exit(struct net *net) { nf_ct_iterate_cleanup(net, &clean_nat, NULL); synchronize_rcu(); - nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, - net->ipv4.nat_htable_size); + nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); } static struct pernet_operations nf_nat_net_ops = { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e95ac42..dc2ff2c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1202,9 +1202,9 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } -void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) +void nf_ct_free_hashtable(void *hash, unsigned int size) { - if (vmalloced) + if (is_vmalloc_addr(hash)) vfree(hash); else free_pages((unsigned long)hash, @@ -1271,8 +1271,7 @@ static void nf_conntrack_cleanup_net(struct net *net) goto i_see_dead_people; } - nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - net->ct.htable_size); + nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); @@ -1301,21 +1300,18 @@ void nf_conntrack_cleanup(struct net *net) } } -void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) +void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) { struct hlist_nulls_head *hash; unsigned int nr_slots, i; size_t sz; - *vmalloced = 0; - BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); sz = nr_slots * sizeof(struct hlist_nulls_head); hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, get_order(sz)); if (!hash) { - *vmalloced = 1; printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); @@ -1331,7 +1327,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) { - int i, bucket, vmalloced, old_vmalloced; + int i, bucket; unsigned int hashsize, old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; @@ -1348,7 +1344,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hashsize) return -EINVAL; - hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1); + hash = nf_ct_alloc_hashtable(&hashsize, 1); if (!hash) return -ENOMEM; @@ -1370,15 +1366,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) } } old_size = init_net.ct.htable_size; - old_vmalloced = init_net.ct.hash_vmalloc; old_hash = init_net.ct.hash; init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; - init_net.ct.hash_vmalloc = vmalloced; init_net.ct.hash = hash; spin_unlock_bh(&nf_conntrack_lock); - nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); + nf_ct_free_hashtable(old_hash, old_size); return 0; } EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); @@ -1491,8 +1485,7 @@ static int nf_conntrack_init_net(struct net *net) } net->ct.htable_size = nf_conntrack_htable_size; - net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, - &net->ct.hash_vmalloc, 1); + net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); if (!net->ct.hash) { ret = -ENOMEM; printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); @@ -1515,8 +1508,7 @@ err_ecache: err_acct: nf_conntrack_expect_fini(net); err_expect: - nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, - net->ct.htable_size); + nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); err_hash: kmem_cache_destroy(net->ct.nf_conntrack_cachep); err_cache: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 4a9ed23..cd1e8e0 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -639,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net) } net->ct.expect_count = 0; - net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, - &net->ct.expect_vmalloc, 0); + net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); if (net->ct.expect_hash == NULL) goto err1; @@ -662,8 +661,7 @@ err3: if (net_eq(net, &init_net)) kmem_cache_destroy(nf_ct_expect_cachep); err2: - nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, - nf_ct_expect_hsize); + nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); err1: return err; } @@ -675,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net) rcu_barrier(); /* Wait for call_rcu() before destroy */ kmem_cache_destroy(nf_ct_expect_cachep); } - nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, - nf_ct_expect_hsize); + nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 767bbe9..1bdfea3 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex); static struct hlist_head *nf_ct_helper_hash __read_mostly; static unsigned int nf_ct_helper_hsize __read_mostly; static unsigned int nf_ct_helper_count __read_mostly; -static int nf_ct_helper_vmalloc; /* Stupid hash, but collision free for the default registrations of the @@ -267,8 +266,7 @@ int nf_conntrack_helper_init(void) int err; nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ - nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, - &nf_ct_helper_vmalloc, 0); + nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); if (!nf_ct_helper_hash) return -ENOMEM; @@ -279,14 +277,12 @@ int nf_conntrack_helper_init(void) return 0; err1: - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, - nf_ct_helper_hsize); + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); return err; } void nf_conntrack_helper_fini(void) { nf_ct_extend_unregister(&helper_extend); - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, - nf_ct_helper_hsize); + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); } -- cgit v1.1 From 43f393caec0362abe03c72799d3f342af3973070 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sun, 16 Jan 2011 18:10:28 +0100 Subject: netfilter: audit target to record accepted/dropped packets This patch adds a new netfilter target which creates audit records for packets traversing a certain chain. It can be used to record packets which are rejected administraively as follows: -N AUDIT_DROP -A AUDIT_DROP -j AUDIT --type DROP -A AUDIT_DROP -j DROP a rule which would typically drop or reject a packet would then invoke the new chain to record packets before dropping them. -j AUDIT_DROP The module is protocol independant and works for iptables, ip6tables and ebtables. The following information is logged: - netfilter hook - packet length - incomming/outgoing interface - MAC src/dst/proto for ethernet packets - src/dst/protocol address for IPv4/IPv6 - src/dst port for TCP/UDP/UDPLITE - icmp type/code Cc: Patrick McHardy Cc: Eric Paris Cc: Al Viro Signed-off-by: Thomas Graf Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 10 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_AUDIT.c | 204 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 net/netfilter/xt_AUDIT.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 1b79353..93918f0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -326,6 +326,16 @@ config NETFILTER_XT_CONNMARK comment "Xtables targets" +config NETFILTER_XT_TARGET_AUDIT + tristate "AUDIT target support" + depends on AUDIT + depends on NETFILTER_ADVANCED + ---help--- + This option adds a 'AUDIT' target, which can be used to create + audit records for packets dropped/accepted. + + To compileit as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_CHECKSUM tristate "CHECKSUM target support" depends on IP_NF_MANGLE || IP6_NF_MANGLE diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 441050f..401d574 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o # targets +obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c new file mode 100644 index 0000000..81802d2 --- /dev/null +++ b/net/netfilter/xt_AUDIT.c @@ -0,0 +1,204 @@ +/* + * Creates audit record for dropped/accepted packets + * + * (C) 2010-2011 Thomas Graf + * (C) 2010-2011 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Thomas Graf "); +MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets"); +MODULE_ALIAS("ipt_AUDIT"); +MODULE_ALIAS("ip6t_AUDIT"); +MODULE_ALIAS("ebt_AUDIT"); +MODULE_ALIAS("arpt_AUDIT"); + +static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb, + unsigned int proto, unsigned int offset) +{ + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: { + const __be16 *pptr; + __be16 _ports[2]; + + pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports); + if (pptr == NULL) { + audit_log_format(ab, " truncated=1"); + return; + } + + audit_log_format(ab, " sport=%hu dport=%hu", + ntohs(pptr[0]), ntohs(pptr[1])); + } + break; + + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: { + const u8 *iptr; + u8 _ih[2]; + + iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih); + if (iptr == NULL) { + audit_log_format(ab, " truncated=1"); + return; + } + + audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu", + iptr[0], iptr[1]); + + } + break; + } +} + +static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) +{ + struct iphdr _iph; + const struct iphdr *ih; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (!ih) { + audit_log_format(ab, " truncated=1"); + return; + } + + audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu", + &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol); + + if (ntohs(ih->frag_off) & IP_OFFSET) { + audit_log_format(ab, " frag=1"); + return; + } + + audit_proto(ab, skb, ih->protocol, ih->ihl * 4); +} + +static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) +{ + struct ipv6hdr _ip6h; + const struct ipv6hdr *ih; + u8 nexthdr; + int offset; + + ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); + if (!ih) { + audit_log_format(ab, " truncated=1"); + return; + } + + nexthdr = ih->nexthdr; + offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), + &nexthdr); + + audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", + &ih->saddr, &ih->daddr, nexthdr); + + if (offset) + audit_proto(ab, skb, nexthdr, offset); +} + +static unsigned int +audit_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_audit_info *info = par->targinfo; + struct audit_buffer *ab; + + ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); + if (ab == NULL) + goto errout; + + audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", + info->type, par->hooknum, skb->len, + par->in ? par->in->name : "?", + par->out ? par->out->name : "?"); + + if (skb->mark) + audit_log_format(ab, " mark=%#x", skb->mark); + + if (skb->dev && skb->dev->type == ARPHRD_ETHER) { + audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + + if (par->family == NFPROTO_BRIDGE) { + switch (eth_hdr(skb)->h_proto) { + case __constant_htons(ETH_P_IP): + audit_ip4(ab, skb); + break; + + case __constant_htons(ETH_P_IPV6): + audit_ip6(ab, skb); + break; + } + } + } + + switch (par->family) { + case NFPROTO_IPV4: + audit_ip4(ab, skb); + break; + + case NFPROTO_IPV6: + audit_ip6(ab, skb); + break; + } + + audit_log_end(ab); + +errout: + return XT_CONTINUE; +} + +static int audit_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_audit_info *info = par->targinfo; + + if (info->type > XT_AUDIT_TYPE_MAX) { + pr_info("Audit type out of range (valid range: 0..%hhu)\n", + XT_AUDIT_TYPE_MAX); + return -ERANGE; + } + + return 0; +} + +static struct xt_target audit_tg_reg __read_mostly = { + .name = "AUDIT", + .family = NFPROTO_UNSPEC, + .target = audit_tg, + .targetsize = sizeof(struct xt_audit_info), + .checkentry = audit_tg_check, + .me = THIS_MODULE, +}; + +static int __init audit_tg_init(void) +{ + return xt_register_target(&audit_tg_reg); +} + +static void __exit audit_tg_exit(void) +{ + xt_unregister_target(&audit_tg_reg); +} + +module_init(audit_tg_init); +module_exit(audit_tg_exit); -- cgit v1.1 From fbabf31e4d482149b5e2704eb0287cf9117bdcf3 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sun, 16 Jan 2011 18:12:59 +0100 Subject: netfilter: create audit records for x_tables replaces The setsockopt() syscall to replace tables is already recorded in the audit logs. This patch stores additional information such as table name and netfilter protocol. Cc: Patrick McHardy Cc: Eric Paris Cc: Al Viro Signed-off-by: Thomas Graf Signed-off-by: Patrick McHardy --- net/netfilter/x_tables.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index ee5de3a..fbc2b72 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -834,6 +835,21 @@ xt_replace_table(struct xt_table *table, */ local_bh_enable(); +#ifdef CONFIG_AUDIT + if (audit_enabled) { + struct audit_buffer *ab; + + ab = audit_log_start(current->audit_context, GFP_KERNEL, + AUDIT_NETFILTER_CFG); + if (ab) { + audit_log_format(ab, "table=%s family=%u entries=%u", + table->name, table->af, + private->number); + audit_log_end(ab); + } + } +#endif + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); -- cgit v1.1 From f1e231a356f90a67f8547c2881a62c92084683c6 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 18 Jan 2011 06:30:13 +0100 Subject: netfilter: xtables: add missing aliases for autoloading via iptables Signed-off-by: Jan Engelhardt --- net/netfilter/xt_IDLETIMER.c | 2 ++ net/netfilter/xt_LED.c | 2 ++ net/netfilter/xt_cpu.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index be1f22e..3bdd443 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras "); MODULE_AUTHOR("Luciano Coelho "); MODULE_DESCRIPTION("Xtables: idle time monitor"); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("ipt_IDLETIMER"); +MODULE_ALIAS("ip6t_IDLETIMER"); diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index a414050..993de2b 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -31,6 +31,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adam Nielsen "); MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); +MODULE_ALIAS("ipt_LED"); +MODULE_ALIAS("ip6t_LED"); static LIST_HEAD(xt_led_triggers); static DEFINE_MUTEX(xt_led_mutex); diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c index b39db8a..c7a2e54 100644 --- a/net/netfilter/xt_cpu.c +++ b/net/netfilter/xt_cpu.c @@ -22,6 +22,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Eric Dumazet "); MODULE_DESCRIPTION("Xtables: CPU match"); +MODULE_ALIAS("ipt_cpu"); +MODULE_ALIAS("ip6t_cpu"); static int cpu_mt_check(const struct xt_mtchk_param *par) { -- cgit v1.1 From 1cc34c30be0e27d4ba8c1ce04a8a4f46c927d121 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 18 Jan 2011 01:36:57 +0100 Subject: netfilter: xt_connlimit: use hotdrop jump mark Signed-off-by: Richard Weinberger Signed-off-by: Jan Engelhardt --- net/netfilter/xt_connlimit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 5c5b6b9..452bc16 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -204,11 +204,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) &info->mask, par->family); spin_unlock_bh(&info->data->lock); - if (connections < 0) { + if (connections < 0) /* kmalloc failed, drop it entirely */ - par->hotdrop = true; - return false; - } + goto hotdrop; return (connections > info->limit) ^ info->inverse; -- cgit v1.1 From a7c2f4d7daf9bbea362763fa7353b1862a2487ad Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 18 Jan 2011 15:02:48 +0100 Subject: netfilter: nf_nat: fix conversion to non-atomic bit ops My previous patch (netfilter: nf_nat: don't use atomic bit operation) made a mistake when converting atomic_set to a normal bit 'or'. IPS_*_BIT should be replaced with IPS_*. Signed-off-by: Changli Gao Cc: Tim Gardner Cc: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 6972cee..3002c04 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -323,9 +323,9 @@ nf_nat_setup_info(struct nf_conn *ct, /* It's done. */ if (maniptype == IP_NAT_MANIP_DST) - ct->status |= IPS_DST_NAT_DONE_BIT; + ct->status |= IPS_DST_NAT_DONE; else - ct->status |= IPS_SRC_NAT_DONE_BIT; + ct->status |= IPS_SRC_NAT_DONE; return NF_ACCEPT; } -- cgit v1.1 From 45eec34195853e918518231dcefaca1ea4ebacfc Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 18 Jan 2011 15:08:13 +0100 Subject: netfilter: nf_conntrack: remove an atomic bit operation As this ct won't be seen by the others, we don't need to set the IPS_CONFIRMED_BIT in atomic way. Signed-off-by: Changli Gao Cc: Tim Gardner Cc: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dc2ff2c..f47ac67 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -486,7 +486,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) ct->timeout.expires += jiffies; add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); + ct->status |= IPS_CONFIRMED; /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers -- cgit v1.1 From 5f2cafe73671d865af88494159f3e8c1b322e1c5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Jan 2011 15:18:08 +0100 Subject: netfilter: Kconfig: NFQUEUE is useless without NETFILTER_NETLINK_QUEUE NFLOG already does the same thing for NETFILTER_NETLINK_LOG. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 93918f0..e2480bd 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -487,6 +487,7 @@ config NETFILTER_XT_TARGET_NFLOG config NETFILTER_XT_TARGET_NFQUEUE tristate '"NFQUEUE" target Support' depends on NETFILTER_ADVANCED + select NETFILTER_NETLINK_QUEUE help This target replaced the old obsolete QUEUE target. -- cgit v1.1 From f15850861860636c905b33a9a5be3dcbc2b0d56a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Jan 2011 15:27:28 +0100 Subject: netfilter: nfnetlink_queue: return error number to caller instead of returning -1 on error, return an error number to allow the caller to handle some errors differently. ECANCELED is used to indicate that the hook is going away and should be ignored. A followup patch will introduce more 'ignore this hook' conditions, (depending on queue settings) and will move kfree_skb responsibility to the caller. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/core.c | 6 ++++-- net/netfilter/nf_queue.c | 44 +++++++++++++++++++++++++++++------------ net/netfilter/nfnetlink_queue.c | 22 +++++++++++++-------- 3 files changed, 49 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e69d537..91d66d2f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -179,9 +179,11 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - verdict >> NF_VERDICT_BITS)) + ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, + verdict >> NF_VERDICT_BITS); + if (ret == -ECANCELED) goto next_hook; + ret = 0; } rcu_read_unlock(); return ret; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 1876f74..ad25c7e 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -125,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb, int (*okfn)(struct sk_buff *), unsigned int queuenum) { - int status; + int status = -ENOENT; struct nf_queue_entry *entry = NULL; #ifdef CONFIG_BRIDGE_NETFILTER struct net_device *physindev; @@ -146,8 +146,10 @@ static int __nf_queue(struct sk_buff *skb, goto err_unlock; entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); - if (!entry) + if (!entry) { + status = -ENOMEM; goto err_unlock; + } *entry = (struct nf_queue_entry) { .skb = skb, @@ -163,9 +165,8 @@ static int __nf_queue(struct sk_buff *skb, if (!try_module_get(entry->elem->owner)) { rcu_read_unlock(); kfree(entry); - return 0; + return -ECANCELED; } - /* Bump dev refs so they don't vanish while packet is out */ if (indev) dev_hold(indev); @@ -192,14 +193,14 @@ static int __nf_queue(struct sk_buff *skb, goto err; } - return 1; + return 0; err_unlock: rcu_read_unlock(); err: kfree_skb(skb); kfree(entry); - return 1; + return status; } int nf_queue(struct sk_buff *skb, @@ -211,6 +212,8 @@ int nf_queue(struct sk_buff *skb, unsigned int queuenum) { struct sk_buff *segs; + int err; + unsigned int queued; if (!skb_is_gso(skb)) return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, @@ -227,19 +230,32 @@ int nf_queue(struct sk_buff *skb, segs = skb_gso_segment(skb, 0); kfree_skb(skb); + /* Does not use PTR_ERR to limit the number of error codes that can be + * returned by nf_queue. For instance, callers rely on -ECANCELED to mean + * 'ignore this hook'. + */ if (IS_ERR(segs)) - return 1; + return -EINVAL; + queued = 0; + err = 0; do { struct sk_buff *nskb = segs->next; segs->next = NULL; - if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, - queuenum)) + if (err == 0) + err = __nf_queue(segs, elem, pf, hook, indev, + outdev, okfn, queuenum); + if (err == 0) + queued++; + else kfree_skb(segs); segs = nskb; } while (segs); - return 1; + + if (unlikely(err && queued)) + err = 0; + return err; } void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) @@ -247,6 +263,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) struct sk_buff *skb = entry->skb; struct list_head *elem = &entry->elem->list; const struct nf_afinfo *afinfo; + int err; rcu_read_lock(); @@ -280,9 +297,10 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - if (!__nf_queue(skb, elem, entry->pf, entry->hook, - entry->indev, entry->outdev, entry->okfn, - verdict >> NF_VERDICT_BITS)) + err = __nf_queue(skb, elem, entry->pf, entry->hook, + entry->indev, entry->outdev, entry->okfn, + verdict >> NF_VERDICT_BITS); + if (err == -ECANCELED) goto next_hook; break; case NF_STOLEN: diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 68e67d1..b83123f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) { struct sk_buff *nskb; struct nfqnl_instance *queue; - int err; + int err = -ENOBUFS; /* rcu_read_lock()ed by nf_hook_slow() */ queue = instance_lookup(queuenum); - if (!queue) + if (!queue) { + err = -ESRCH; goto err_out; + } - if (queue->copy_mode == NFQNL_COPY_NONE) + if (queue->copy_mode == NFQNL_COPY_NONE) { + err = -EINVAL; goto err_out; + } nskb = nfqnl_build_packet_message(queue, entry); - if (nskb == NULL) + if (nskb == NULL) { + err = -ENOMEM; goto err_out; - + } spin_lock_bh(&queue->lock); - if (!queue->peer_pid) + if (!queue->peer_pid) { + err = -EINVAL; goto err_out_free_nskb; - + } if (queue->queue_total >= queue->queue_maxlen) { queue->queue_dropped++; if (net_ratelimit()) @@ -432,7 +438,7 @@ err_out_free_nskb: err_out_unlock: spin_unlock_bh(&queue->lock); err_out: - return -1; + return err; } static int -- cgit v1.1 From 06cdb6349c1f3fd439398dbc04ce4c696f0a41ab Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Jan 2011 15:28:38 +0100 Subject: netfilter: nfnetlink_queue: do not free skb on error Move free responsibility from nf_queue to caller. This enables more flexible error handling; we can now accept the skb instead of freeing it. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/core.c | 7 +++++-- net/netfilter/nf_queue.c | 17 ++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 91d66d2f..0c5b796 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -181,8 +181,11 @@ next_hook: } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS); - if (ret == -ECANCELED) - goto next_hook; + if (ret < 0) { + if (ret == -ECANCELED) + goto next_hook; + kfree_skb(skb); + } ret = 0; } rcu_read_unlock(); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index ad25c7e..5c4b730 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -163,9 +163,8 @@ static int __nf_queue(struct sk_buff *skb, /* If it's going away, ignore hook. */ if (!try_module_get(entry->elem->owner)) { - rcu_read_unlock(); - kfree(entry); - return -ECANCELED; + status = -ECANCELED; + goto err_unlock; } /* Bump dev refs so they don't vanish while packet is out */ if (indev) @@ -198,7 +197,6 @@ static int __nf_queue(struct sk_buff *skb, err_unlock: rcu_read_unlock(); err: - kfree_skb(skb); kfree(entry); return status; } @@ -229,7 +227,6 @@ int nf_queue(struct sk_buff *skb, } segs = skb_gso_segment(skb, 0); - kfree_skb(skb); /* Does not use PTR_ERR to limit the number of error codes that can be * returned by nf_queue. For instance, callers rely on -ECANCELED to mean * 'ignore this hook'. @@ -253,8 +250,11 @@ int nf_queue(struct sk_buff *skb, segs = nskb; } while (segs); + /* also free orig skb if only some segments were queued */ if (unlikely(err && queued)) err = 0; + if (err == 0) + kfree_skb(skb); return err; } @@ -300,8 +300,11 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) err = __nf_queue(skb, elem, entry->pf, entry->hook, entry->indev, entry->outdev, entry->okfn, verdict >> NF_VERDICT_BITS); - if (err == -ECANCELED) - goto next_hook; + if (err < 0) { + if (err == -ECANCELED) + goto next_hook; + kfree_skb(skb); + } break; case NF_STOLEN: default: -- cgit v1.1 From f615df76ed862b7d3927ec5f55b805ca19be29d9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Jan 2011 15:52:14 +0100 Subject: netfilter: reduce NF_VERDICT_MASK to 0xff NF_VERDICT_MASK is currently 0xffff. This is because the upper 16 bits are used to store errno (for NF_DROP) or the queue number (NF_QUEUE verdict). As there are up to 0xffff different queues available, there is no more room to store additional flags. At the moment there are only 6 different verdicts, i.e. we can reduce NF_VERDICT_MASK to 0xff to allow storing additional flags in the 0xff00 space. NF_VERDICT_BITS would then be reduced to 8, but because the value is exported to userspace, this might cause breakage; e.g.: e.g. 'queuenr = (1 << NF_VERDICT_BITS) | NF_QUEUE' would now break. Thus, remove NF_VERDICT_BITS usage in the kernel and move the old value to the 'userspace compat' section. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/core.c | 4 ++-- net/netfilter/nf_queue.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0c5b796..4d88e45 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -175,12 +175,12 @@ next_hook: ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { kfree_skb(skb); - ret = -(verdict >> NF_VERDICT_BITS); + ret = NF_DROP_GETERR(verdict); if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - verdict >> NF_VERDICT_BITS); + verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ECANCELED) goto next_hook; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5c4b730..ce1150d4a 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -299,7 +299,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) case NF_QUEUE: err = __nf_queue(skb, elem, entry->pf, entry->hook, entry->indev, entry->outdev, entry->okfn, - verdict >> NF_VERDICT_BITS); + verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; -- cgit v1.1 From 94b27cc36123069966616670c3653cd6873babe9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Jan 2011 16:08:30 +0100 Subject: netfilter: allow NFQUEUE bypass if no listener is available If an skb is to be NF_QUEUE'd, but no program has opened the queue, the packet is dropped. This adds a v2 target revision of xt_NFQUEUE that allows packets to continue through the ruleset instead. Because the actual queueing happens outside of the target context, the 'bypass' flag has to be communicated back to the netfilter core. Unfortunately the only choice to do this without adding a new function argument is to use the target function return value (i.e. the verdict). In the NF_QUEUE case, the upper 16bit already contain the queue number to use. The previous patch reduced NF_VERDICT_MASK to 0xff, i.e. we now have extra room for a new flag. If a hook issued a NF_QUEUE verdict, then the netfilter core will continue packet processing if the queueing hook returns -ESRCH (== "this queue does not exist") and the new NF_VERDICT_FLAG_QUEUE_BYPASS flag is set in the verdict value. Note: If the queue exists, but userspace does not consume packets fast enough, the skb will still be dropped. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/core.c | 3 +++ net/netfilter/nf_queue.c | 7 ++++++- net/netfilter/xt_NFQUEUE.c | 28 +++++++++++++++++++++++++--- 3 files changed, 34 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 4d88e45..1e00bf7 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -184,6 +184,9 @@ next_hook: if (ret < 0) { if (ret == -ECANCELED) goto next_hook; + if (ret == -ESRCH && + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) + goto next_hook; kfree_skb(skb); } ret = 0; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index ce1150d4a..5ab22e2 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -138,8 +138,10 @@ static int __nf_queue(struct sk_buff *skb, rcu_read_lock(); qh = rcu_dereference(queue_handler[pf]); - if (!qh) + if (!qh) { + status = -ESRCH; goto err_unlock; + } afinfo = nf_get_afinfo(pf); if (!afinfo) @@ -303,6 +305,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (err < 0) { if (err == -ECANCELED) goto next_hook; + if (err == -ESRCH && + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) + goto next_hook; kfree_skb(skb); } break; diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 3962770..d4f4b5d 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -83,9 +83,20 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) return NF_QUEUE_NR(queue); } -static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) +static unsigned int +nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_NFQ_info_v1 *info = par->targinfo; + const struct xt_NFQ_info_v2 *info = par->targinfo; + unsigned int ret = nfqueue_tg_v1(skb, par); + + if (info->bypass) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + return ret; +} + +static int nfqueue_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_NFQ_info_v2 *info = par->targinfo; u32 maxid; if (unlikely(!rnd_inited)) { @@ -102,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) info->queues_total, maxid); return -ERANGE; } + if (par->target->revision == 2 && info->bypass > 1) + return -EINVAL; return 0; } @@ -117,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { .name = "NFQUEUE", .revision = 1, .family = NFPROTO_UNSPEC, - .checkentry = nfqueue_tg_v1_check, + .checkentry = nfqueue_tg_check, .target = nfqueue_tg_v1, .targetsize = sizeof(struct xt_NFQ_info_v1), .me = THIS_MODULE, }, + { + .name = "NFQUEUE", + .revision = 2, + .family = NFPROTO_UNSPEC, + .checkentry = nfqueue_tg_check, + .target = nfqueue_tg_v2, + .targetsize = sizeof(struct xt_NFQ_info_v2), + .me = THIS_MODULE, + }, }; static int __init nfqueue_tg_init(void) -- cgit v1.1 From 94d117a1c78df38abdea0c09ef00c205b923b567 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Jan 2011 16:27:56 +0100 Subject: netfilter: ipt_CLUSTERIP: remove "no conntrack!" When a packet is meant to be handled by another node of the cluster, silently drop it instead of flooding kernel log. Note : INVALID packets are also dropped without notice. Signed-off-by: Eric Dumazet Acked-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1e26a48..403ca57 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) * that the ->target() function isn't called after ->destroy() */ ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) { - pr_info("no conntrack!\n"); - /* FIXME: need to drop invalid ones, since replies - * to outgoing connections of other nodes will be - * marked as INVALID */ + if (ct == NULL) return NF_DROP; - } /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ -- cgit v1.1 From 93557f53e1fbd9e2b6574ab0a9b5852628fde9e3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 18 Jan 2011 18:12:24 +0100 Subject: netfilter: nf_conntrack: nf_conntrack snmp helper Adding support for SNMP broadcast connection tracking. The SNMP broadcast requests are now paired with the SNMP responses. Thus allowing using SNMP broadcasts with firewall enabled. Please refer to the following conversation: http://marc.info/?l=netfilter-devel&m=125992205006600&w=2 Patrick McHardy wrote: > > The best solution would be to add generic broadcast tracking, the > > use of expectations for this is a bit of abuse. > > The second best choice I guess would be to move the help() function > > to a shared module and generalize it so it can be used for both. This patch implements the "second best choice". Since the netbios-ns conntrack module uses the same helper functionality as the snmp, only one helper function is added for both snmp and netbios-ns modules into the new object - nf_conntrack_broadcast. Signed-off-by: Jiri Olsa Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 3 +- net/ipv4/netfilter/nf_nat_snmp_basic.c | 9 ++-- net/netfilter/Kconfig | 19 ++++++++ net/netfilter/Makefile | 2 + net/netfilter/nf_conntrack_broadcast.c | 82 +++++++++++++++++++++++++++++++++ net/netfilter/nf_conntrack_netbios_ns.c | 74 ++++------------------------- net/netfilter/nf_conntrack_snmp.c | 77 +++++++++++++++++++++++++++++++ 7 files changed, 196 insertions(+), 70 deletions(-) create mode 100644 net/netfilter/nf_conntrack_broadcast.c create mode 100644 net/netfilter/nf_conntrack_snmp.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index babd1a2..f926a31 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" - depends on NF_NAT + depends on NF_CONNTRACK_SNMP && NF_NAT depends on NETFILTER_ADVANCED + default NF_NAT && NF_CONNTRACK_SNMP ---help--- This module implements an Application Layer Gateway (ALG) for diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ee5f419..8812a02 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -54,6 +54,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Morris "); @@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void) { int ret = 0; - ret = nf_conntrack_helper_register(&snmp_helper); - if (ret < 0) - return ret; + BUG_ON(nf_nat_snmp_hook != NULL); + rcu_assign_pointer(nf_nat_snmp_hook, help); + ret = nf_conntrack_helper_register(&snmp_trap_helper); if (ret < 0) { nf_conntrack_helper_unregister(&snmp_helper); @@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void) static void __exit nf_nat_snmp_basic_fini(void) { - nf_conntrack_helper_unregister(&snmp_helper); + rcu_assign_pointer(nf_nat_snmp_hook, NULL); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index e2480bd..939b504 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -185,9 +185,13 @@ config NF_CONNTRACK_IRC To compile it as a module, choose M here. If unsure, say N. +config NF_CONNTRACK_BROADCAST + tristate + config NF_CONNTRACK_NETBIOS_NS tristate "NetBIOS name service protocol support" depends on NETFILTER_ADVANCED + select NF_CONNTRACK_BROADCAST help NetBIOS name service requests are sent as broadcast messages from an unprivileged port and responded to with unicast messages to the @@ -204,6 +208,21 @@ config NF_CONNTRACK_NETBIOS_NS To compile it as a module, choose M here. If unsure, say N. +config NF_CONNTRACK_SNMP + tristate "SNMP service protocol support" + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_BROADCAST + help + SNMP service requests are sent as broadcast messages from an + unprivileged port and responded to with unicast messages to the + same port. This make them hard to firewall properly because connection + tracking doesn't deal with broadcasts. This helper tracks locally + originating SNMP service requests and the corresponding + responses. It relies on correct IP address configuration, specifically + netmask and broadcast address. + + To compile it as a module, choose M here. If unsure, say N. + config NF_CONNTRACK_PPTP tristate "PPtP protocol support" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 401d574..2c2628d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -28,7 +28,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o +obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o +obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c new file mode 100644 index 0000000..4e99cca --- /dev/null +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -0,0 +1,82 @@ +/* + * broadcast connection tracking helper + * + * (c) 2005 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +int nf_conntrack_broadcast_help(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int timeout) +{ + struct nf_conntrack_expect *exp; + struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + struct in_device *in_dev; + struct nf_conn_help *help = nfct_help(ct); + __be32 mask = 0; + + /* we're only interested in locally generated packets */ + if (skb->sk == NULL) + goto out; + if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) + goto out; + if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + goto out; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(rt->dst.dev); + if (in_dev != NULL) { + for_primary_ifa(in_dev) { + if (ifa->ifa_broadcast == iph->daddr) { + mask = ifa->ifa_mask; + break; + } + } endfor_ifa(in_dev); + } + rcu_read_unlock(); + + if (mask == 0) + goto out; + + exp = nf_ct_expect_alloc(ct); + if (exp == NULL) + goto out; + + exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port; + + exp->mask.src.u3.ip = mask; + exp->mask.src.u.udp.port = htons(0xFFFF); + + exp->expectfn = NULL; + exp->flags = NF_CT_EXPECT_PERMANENT; + exp->class = NF_CT_EXPECT_CLASS_DEFAULT; + exp->helper = NULL; + + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); + + nf_ct_refresh(ct, skb, timeout * HZ); +out: + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help); + +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index aadde01..4c8f30a 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -18,14 +18,7 @@ #include #include #include -#include -#include -#include -#include #include -#include -#include -#include #include #include @@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns"); MODULE_ALIAS_NFCT_HELPER("netbios_ns"); static unsigned int timeout __read_mostly = 3; -module_param(timeout, uint, 0400); +module_param(timeout, uint, S_IRUSR); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); -static int help(struct sk_buff *skb, unsigned int protoff, - struct nf_conn *ct, enum ip_conntrack_info ctinfo) -{ - struct nf_conntrack_expect *exp; - struct iphdr *iph = ip_hdr(skb); - struct rtable *rt = skb_rtable(skb); - struct in_device *in_dev; - __be32 mask = 0; - - /* we're only interested in locally generated packets */ - if (skb->sk == NULL) - goto out; - if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) - goto out; - if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) - goto out; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(rt->dst.dev); - if (in_dev != NULL) { - for_primary_ifa(in_dev) { - if (ifa->ifa_broadcast == iph->daddr) { - mask = ifa->ifa_mask; - break; - } - } endfor_ifa(in_dev); - } - rcu_read_unlock(); - - if (mask == 0) - goto out; - - exp = nf_ct_expect_alloc(ct); - if (exp == NULL) - goto out; - - exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->tuple.src.u.udp.port = htons(NMBD_PORT); - - exp->mask.src.u3.ip = mask; - exp->mask.src.u.udp.port = htons(0xFFFF); - - exp->expectfn = NULL; - exp->flags = NF_CT_EXPECT_PERMANENT; - exp->class = NF_CT_EXPECT_CLASS_DEFAULT; - exp->helper = NULL; - - nf_ct_expect_related(exp); - nf_ct_expect_put(exp); - - nf_ct_refresh(ct, skb, timeout * HZ); -out: - return NF_ACCEPT; -} - static struct nf_conntrack_expect_policy exp_policy = { .max_expected = 1, }; +static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); +} + static struct nf_conntrack_helper helper __read_mostly = { .name = "netbios-ns", - .tuple.src.l3num = AF_INET, + .tuple.src.l3num = NFPROTO_IPV4, .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, .me = THIS_MODULE, - .help = help, + .help = netbios_ns_help, .expect_policy = &exp_policy, }; diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c new file mode 100644 index 0000000..6e545e2 --- /dev/null +++ b/net/netfilter/nf_conntrack_snmp.c @@ -0,0 +1,77 @@ +/* + * SNMP service broadcast connection tracking helper + * + * (c) 2011 Jiri Olsa + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include + +#include +#include +#include + +#define SNMP_PORT 161 + +MODULE_AUTHOR("Jiri Olsa "); +MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFCT_HELPER("snmp"); + +static unsigned int timeout __read_mostly = 30; +module_param(timeout, uint, S_IRUSR); +MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); + +int (*nf_nat_snmp_hook)(struct sk_buff *skb, + unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo); +EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); + +static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + typeof(nf_nat_snmp_hook) nf_nat_snmp; + + nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); + + nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook); + if (nf_nat_snmp && ct->status & IPS_NAT_MASK) + return nf_nat_snmp(skb, protoff, ct, ctinfo); + + return NF_ACCEPT; +} + +static struct nf_conntrack_expect_policy exp_policy = { + .max_expected = 1, +}; + +static struct nf_conntrack_helper helper __read_mostly = { + .name = "snmp", + .tuple.src.l3num = NFPROTO_IPV4, + .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, + .me = THIS_MODULE, + .help = snmp_conntrack_help, + .expect_policy = &exp_policy, +}; + +static int __init nf_conntrack_snmp_init(void) +{ + exp_policy.timeout = timeout; + return nf_conntrack_helper_register(&helper); +} + +static void __exit nf_conntrack_snmp_fini(void) +{ + nf_conntrack_helper_unregister(&helper); +} + +module_init(nf_conntrack_snmp_init); +module_exit(nf_conntrack_snmp_fini); -- cgit v1.1 From d6ae3bae3d1bf7a8bf367e29f2cac0788dcd0db5 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Tue, 18 Jan 2011 06:39:15 +0000 Subject: af_unix: implement socket filter Linux Socket Filters can already be successfully attached and detached on unix sockets with setsockopt(sockfd, SOL_SOCKET, SO_{ATTACH,DETACH}_FILTER, ...). See: Documentation/networking/filter.txt But the filter was never used in the unix socket code so it did not work. This patch uses sk_filter() to filter buffers before delivery. This short program demonstrates the problem on SOCK_DGRAM. int main(void) { int i, j, ret; int sv[2]; struct pollfd fds[2]; char *message = "Hello world!"; char buffer[64]; struct sock_filter ins[32] = {{0,},}; struct sock_fprog filter; socketpair(AF_UNIX, SOCK_DGRAM, 0, sv); for (i = 0 ; i < 2 ; i++) { fds[i].fd = sv[i]; fds[i].events = POLLIN; fds[i].revents = 0; } for(j = 1 ; j < 13 ; j++) { /* Set a socket filter to truncate the message */ memset(ins, 0, sizeof(ins)); ins[0].code = BPF_RET|BPF_K; ins[0].k = j; filter.len = 1; filter.filter = ins; setsockopt(sv[1], SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)); /* send a message */ send(sv[0], message, strlen(message) + 1, 0); /* The filter should let the message pass but truncated. */ poll(fds, 2, 0); /* Receive the truncated message*/ ret = recv(sv[1], buffer, 64, 0); printf("received %d bytes, expected %d\n", ret, j); } for (i = 0 ; i < 2 ; i++) close(sv[i]); return 0; } Signed-off-by: Alban Crequy Reviewed-by: Ian Molton Signed-off-by: David S. Miller --- net/unix/af_unix.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dd419d2..8d9bbba 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1475,6 +1475,12 @@ restart: goto out_free; } + if (sk_filter(other, skb) < 0) { + /* Toss the packet but do not return any error to the sender */ + err = len; + goto out_free; + } + unix_state_lock(other); err = -EPERM; if (!unix_may_send(sk, other)) -- cgit v1.1 From 80f8f1027b99660897bdeaeae73002185d829906 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Jan 2011 07:46:52 +0000 Subject: net: filter: dont block softirqs in sk_run_filter() Packet filter (BPF) doesnt need to disable softirqs, being fully re-entrant and lock-less. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/filter.c | 6 +++--- net/packet/af_packet.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index afc5837..232b187 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) if (err) return err; - rcu_read_lock_bh(); - filter = rcu_dereference_bh(sk->sk_filter); + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); if (filter) { unsigned int pkt_len = sk_run_filter(skb, filter->insns); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } - rcu_read_unlock_bh(); + rcu_read_unlock(); return err; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 91cb1d7..c3fc7b7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -523,11 +523,11 @@ static inline unsigned int run_filter(const struct sk_buff *skb, { struct sk_filter *filter; - rcu_read_lock_bh(); - filter = rcu_dereference_bh(sk->sk_filter); + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); if (filter != NULL) res = sk_run_filter(skb, filter->insns); - rcu_read_unlock_bh(); + rcu_read_unlock(); return res; } -- cgit v1.1 From a992ca2a0498edd22a88ac8c41570f536de29c9e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 19 Jan 2011 16:00:07 +0100 Subject: netfilter: nf_conntrack_tstamp: add flow-based timestamp extension This patch adds flow-based timestamping for conntracks. This conntrack extension is disabled by default. Basically, we use two 64-bits variables to store the creation timestamp once the conntrack has been confirmed and the other to store the deletion time. This extension is disabled by default, to enable it, you have to: echo 1 > /proc/sys/net/netfilter/nf_conntrack_timestamp This patch allows to save memory for user-space flow-based loogers such as ulogd2. In short, ulogd2 does not need to keep a hashtable with the conntrack in user-space to know when they were created and destroyed, instead we use the kernel timestamp. If we want to have a sane IPFIX implementation in user-space, this nanosecs resolution timestamps are also useful. Other custom user-space applications can benefit from this via libnetfilter_conntrack. This patch modifies the /proc output to display the delta time in seconds since the flow start. You can also obtain the flow-start date by means of the conntrack-tools. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 11 +++ net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_core.c | 26 +++++++ net/netfilter/nf_conntrack_netlink.c | 46 +++++++++++- net/netfilter/nf_conntrack_standalone.c | 41 +++++++++++ net/netfilter/nf_conntrack_timestamp.c | 120 ++++++++++++++++++++++++++++++++ 6 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 net/netfilter/nf_conntrack_timestamp.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 939b504..faf7412 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS If unsure, say `N'. +config NF_CONNTRACK_TIMESTAMP + bool 'Connection tracking timestamping' + depends on NETFILTER_ADVANCED + help + This option enables support for connection tracking timestamping. + This allows you to store the flow start-time and to obtain + the flow-stop time (once it has been destroyed) via Connection + tracking events. + + If unsure, say `N'. + config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 2c2628d..9ae6878 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,6 +1,7 @@ netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o +nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f47ac67..1909311 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); static void death_by_timeout(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; + struct nf_conn_tstamp *tstamp; + + tstamp = nf_conn_tstamp_find(ct); + if (tstamp && tstamp->stop == 0) + tstamp->stop = ktime_to_ns(ktime_get_real()); if (!test_bit(IPS_DYING_BIT, &ct->status) && unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { @@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; + struct nf_conn_tstamp *tstamp; struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; @@ -488,6 +495,14 @@ __nf_conntrack_confirm(struct sk_buff *skb) atomic_inc(&ct->ct_general.use); ct->status |= IPS_CONFIRMED; + /* set conntrack timestamp, if enabled. */ + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) { + if (skb->tstamp.tv64 == 0) + __net_timestamp((struct sk_buff *)skb); + + tstamp->start = ktime_to_ns(skb->tstamp); + } /* Since the lookup is lockless, hash insertion must be done after * starting the timer and setting the CONFIRMED bit. The RCU barriers * guarantee that no other CPU can find the conntrack before the above @@ -746,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); + nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, @@ -1186,6 +1202,11 @@ struct __nf_ct_flush_report { static int kill_report(struct nf_conn *i, void *data) { struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; + struct nf_conn_tstamp *tstamp; + + tstamp = nf_conn_tstamp_find(i); + if (tstamp && tstamp->stop == 0) + tstamp->stop = ktime_to_ns(ktime_get_real()); /* If we fail to deliver the event, death_by_timeout() will retry */ if (nf_conntrack_event_report(IPCT_DESTROY, i, @@ -1497,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_acct_init(net); if (ret < 0) goto err_acct; + ret = nf_conntrack_tstamp_init(net); + if (ret < 0) + goto err_tstamp; ret = nf_conntrack_ecache_init(net); if (ret < 0) goto err_ecache; @@ -1504,6 +1528,8 @@ static int nf_conntrack_init_net(struct net *net) return 0; err_ecache: + nf_conntrack_tstamp_fini(net); +err_tstamp: nf_conntrack_acct_fini(net); err_acct: nf_conntrack_expect_fini(net); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9eabaa6..715d56c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -42,6 +42,7 @@ #include #include #include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -230,6 +231,33 @@ nla_put_failure: return -1; } +static int +ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) +{ + struct nlattr *nest_count; + const struct nf_conn_tstamp *tstamp; + + tstamp = nf_conn_tstamp_find(ct); + if (!tstamp) + return 0; + + nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED); + if (!nest_count) + goto nla_put_failure; + + NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)); + if (tstamp->stop != 0) { + NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP, + cpu_to_be64(tstamp->stop)); + } + nla_nest_end(skb, nest_count); + + return 0; + +nla_put_failure: + return -1; +} + #ifdef CONFIG_NF_CONNTRACK_MARK static inline int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) @@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, ctnetlink_dump_timeout(skb, ct) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || ctnetlink_dump_mark(skb, ct) < 0 || @@ -471,6 +500,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct) } static inline size_t +ctnetlink_timestamp_size(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP + if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) + return 0; + return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t)); +#else + return 0; +#endif +} + +static inline size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) { return NLMSG_ALIGN(sizeof(struct nfgenmsg)) @@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ + ctnetlink_counters_size(ct) + + ctnetlink_timestamp_size(ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + nla_total_size(0) /* CTA_PROTOINFO */ + nla_total_size(0) /* CTA_HELP */ @@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) if (events & (1 << IPCT_DESTROY)) { if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; } else { if (ctnetlink_dump_timeout(skb, ct) < 0) @@ -1360,6 +1403,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); + nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 8257bf6..69107fd 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -29,6 +29,7 @@ #include #include #include +#include #include MODULE_LICENSE("GPL"); @@ -46,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple); struct ct_iter_state { struct seq_net_private p; unsigned int bucket; + u_int64_t time_now; }; static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) @@ -96,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) static void *ct_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { + struct ct_iter_state *st = seq->private; + + st->time_now = ktime_to_ns(ktime_get_real()); rcu_read_lock(); return ct_get_idx(seq, *pos); } @@ -135,6 +140,39 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) } #endif +#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP +static u_int64_t ct_delta_time(u_int64_t time_now, const struct nf_conn *ct) +{ + struct nf_conn_tstamp *tstamp; + + tstamp = nf_conn_tstamp_find(ct); + if (tstamp) { + u_int64_t delta_time = time_now - tstamp->start; + return delta_time > 0 ? div_s64(delta_time, NSEC_PER_SEC) : 0; + } + return -1; +} + +static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) +{ + struct ct_iter_state *st = s->private; + u_int64_t delta_time; + + delta_time = ct_delta_time(st->time_now, ct); + if (delta_time < 0) + return 0; + + return seq_printf(s, "delta-time=%llu ", + (unsigned long long)delta_time); +} +#else +static inline int +ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) +{ + return 0; +} +#endif + /* return 0 on success, 1 in case of error */ static int ct_seq_show(struct seq_file *s, void *v) { @@ -203,6 +241,9 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #endif + if (ct_show_delta_time(s, ct)) + goto release; + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) goto release; diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c new file mode 100644 index 0000000..af7dd31 --- /dev/null +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -0,0 +1,120 @@ +/* + * (C) 2010 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + */ + +#include +#include +#include +#include + +#include +#include +#include + +static int nf_ct_tstamp __read_mostly; + +module_param_named(tstamp, nf_ct_tstamp, bool, 0644); +MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); + +#ifdef CONFIG_SYSCTL +static struct ctl_table tstamp_sysctl_table[] = { + { + .procname = "nf_conntrack_timestamp", + .data = &init_net.ct.sysctl_tstamp, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; +#endif /* CONFIG_SYSCTL */ + +static struct nf_ct_ext_type tstamp_extend __read_mostly = { + .len = sizeof(struct nf_conn_tstamp), + .align = __alignof__(struct nf_conn_tstamp), + .id = NF_CT_EXT_TSTAMP, +}; + +#ifdef CONFIG_SYSCTL +static int nf_conntrack_tstamp_init_sysctl(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table), + GFP_KERNEL); + if (!table) + goto out; + + table[0].data = &net->ct.sysctl_tstamp; + + net->ct.tstamp_sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.tstamp_sysctl_header) { + printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n"); + goto out_register; + } + return 0; + +out_register: + kfree(table); +out: + return -ENOMEM; +} + +static void nf_conntrack_tstamp_fini_sysctl(struct net *net) +{ + struct ctl_table *table; + + table = net->ct.tstamp_sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.tstamp_sysctl_header); + kfree(table); +} +#else +static int nf_conntrack_tstamp_init_sysctl(struct net *net) +{ + return 0; +} + +static void nf_conntrack_tstamp_fini_sysctl(struct net *net) +{ +} +#endif + +int nf_conntrack_tstamp_init(struct net *net) +{ + int ret; + + net->ct.sysctl_tstamp = nf_ct_tstamp; + + if (net_eq(net, &init_net)) { + ret = nf_ct_extend_register(&tstamp_extend); + if (ret < 0) { + printk(KERN_ERR "nf_ct_tstamp: Unable to register " + "extension\n"); + goto out_extend_register; + } + } + + ret = nf_conntrack_tstamp_init_sysctl(net); + if (ret < 0) + goto out_sysctl; + + return 0; + +out_sysctl: + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&tstamp_extend); +out_extend_register: + return ret; +} + +void nf_conntrack_tstamp_fini(struct net *net) +{ + nf_conntrack_tstamp_fini_sysctl(net); + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&tstamp_extend); +} -- cgit v1.1 From 9d52501b421450ddd9e000c9788ac3be0e44ef1c Mon Sep 17 00:00:00 2001 From: Joel A Fernandes Date: Mon, 10 Jan 2011 00:44:23 -0600 Subject: mac80211: Rewrote code for checking if destinations are proxied. Rewrote code for checking if the destination is proxied by a mesh portal, to facilitate better understanding of the functionality. Signed-off-by: Joel A Fernandes Acked-by: Javier Cardona Signed-off-by: John W. Linville --- net/mac80211/tx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5950e3a..dc261bb 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1815,19 +1815,19 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, mppath = mpp_path_lookup(skb->data, sdata); /* - * Do not use address extension, if it is a packet from - * the same interface and the destination is not being - * proxied by any other mest point. + * Use address extension if it is a packet from + * another interface or if we know the destination + * is being proxied by a portal (i.e. portal address + * differs from proxied address) */ if (compare_ether_addr(sdata->vif.addr, skb->data + ETH_ALEN) == 0 && - (!mppath || !compare_ether_addr(mppath->mpp, skb->data))) { + !(mppath && compare_ether_addr(mppath->mpp, skb->data))) { hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, skb->data, skb->data + ETH_ALEN); meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata, NULL, NULL); } else { - /* packet from other interface */ int is_mesh_mcast = 1; const u8 *mesh_da; -- cgit v1.1 From dcac908babcd8ce21057e476c8df609b28ad2cd8 Mon Sep 17 00:00:00 2001 From: Nick Ledovskikh Date: Tue, 11 Jan 2011 14:35:12 +0000 Subject: mac80211:mesh_mpp_table_grow call should depend on MESH_WORK_GROW_MPP_TABLE flag. Replace MESH_WORK_GROW_MPATH_TABLE by MESH_WORK_GROW_MPP_TABLE in mesh_mpp_table_grow call condition. (Clearly the original was a typo... -- JWL) Signed-off-by: Nickolay Ledovskikh Signed-off-by: John W. Linville --- net/mac80211/mesh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index ca3af46..2563fd1 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -645,7 +645,7 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) mesh_mpath_table_grow(); - if (test_and_clear_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags)) + if (test_and_clear_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags)) mesh_mpp_table_grow(); if (test_and_clear_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags)) -- cgit v1.1 From df6ba5d80d6c9b51471d5fa046c3c06988e5f62a Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 12 Jan 2011 15:26:30 +0200 Subject: mac80211: add hw configuration for max ampdu buffer size Some devices don't support the maximum AMDPU buffer size of 64, so we need to add an option to configure this in the hardware configuration. This value will be used in the ADDBA response instead of the value suggested in the request, if the latter is greater than the max supported. Signed-off-by: Luciano Coelho Tested-by: Juuso Oikarinen Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 3 +++ net/mac80211/main.c | 1 + 2 files changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f138b19..002db5e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -239,6 +239,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, buf_size = buf_size << sband->ht_cap.ampdu_factor; } + /* make sure the size doesn't exceed the maximum supported by the hw */ + if (buf_size > local->hw.max_rx_aggregation_subframes) + buf_size = local->hw.max_rx_aggregation_subframes; /* examine state machine */ mutex_lock(&sta->ampdu_mlme.mtx); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 485d36b..1c507c6 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -552,6 +552,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.queues = 1; local->hw.max_rates = 1; local->hw.max_report_rates = 0; + local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; local->user_power_level = -1; -- cgit v1.1 From 0a65169b1f602b955176cb5f0789139d0fccb041 Mon Sep 17 00:00:00 2001 From: Wey-Yi Guy Date: Fri, 14 Jan 2011 08:07:56 -0800 Subject: mac80211: mesh only parameter mppath maybe unused mppath is mesh related parameter and maybe unused Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index dc261bb..2378305 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1750,7 +1750,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, __le16 fc; struct ieee80211_hdr hdr; struct ieee80211s_hdr mesh_hdr __maybe_unused; - struct mesh_path *mppath = NULL; + struct mesh_path __maybe_unused *mppath = NULL; const u8 *encaps_data; int encaps_len, skip_header_bytes; int nh_pos, h_pos; -- cgit v1.1 From bfc31df33b162540c6c3e1473e022cd0a312a522 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 14 Jan 2011 09:32:18 -0800 Subject: mac80211: Show max retry-counts in kernel messages. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 45fbb9e..eecbb1f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1972,9 +1972,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, "%s: No ack for nullfunc frame to" - " AP %pM, try %d\n", + " AP %pM, try %d/%i\n", sdata->name, bssid, - ifmgd->probe_send_count); + ifmgd->probe_send_count, max_tries); #endif ieee80211_mgd_probe_ap_send(sdata); } else { @@ -2001,10 +2001,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) #ifdef CONFIG_MAC80211_VERBOSE_DEBUG wiphy_debug(local->hw.wiphy, "%s: No probe response from AP %pM" - " after %dms, try %d\n", + " after %dms, try %d/%i\n", sdata->name, bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ, - ifmgd->probe_send_count); + ifmgd->probe_send_count, max_tries); #endif ieee80211_mgd_probe_ap_send(sdata); } else { -- cgit v1.1 From ac1bd8464f161ed1475ef73c431b926256c6b5bb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 18 Jan 2011 13:45:32 +0100 Subject: mac80211: don't return beacons when mesh is disabled When mesh is disabled, mac80211 was returning beacons with an empty mesh ID. That isn't desirable, even if drivers shouldn't be trying to get beacons to start with. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/tx.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2378305..e46c801 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2299,6 +2299,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, struct ieee80211_mgmt *mgmt; u8 *pos; +#ifdef CONFIG_MAC80211_MESH + if (!sdata->u.mesh.mesh_id_len) + goto out; +#endif + /* headroom, head length, tail length and maximum TIM length */ skb = dev_alloc_skb(local->tx_headroom + 400 + sdata->u.mesh.vendor_ie_len); -- cgit v1.1 From 0b01f030d38e00650e2db42da083d8647aad40a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 18 Jan 2011 13:51:05 +0100 Subject: mac80211: track receiver's aggregation reorder buffer size The aggregation code currently doesn't implement the buffer size negotiation. It will always request a max buffer size (which is fine, if a little pointless, as the mac80211 code doesn't know and might just use 0 instead), but if the peer requests a smaller size it isn't possible to honour this request. In order to fix this, look at the buffer size in the addBA response frame, keep track of it and pass it to the driver in the ampdu_action callback when called with the IEEE80211_AMPDU_TX_OPERATIONAL action. That way the driver can limit the number of subframes in aggregates appropriately. Note that this doesn't fix any drivers apart from the addition of the new argument -- they all need to be updated separately to use this variable! Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 4 ++-- net/mac80211/agg-tx.c | 20 +++++++++++++++++--- net/mac80211/driver-ops.h | 6 +++--- net/mac80211/driver-trace.h | 11 +++++++---- net/mac80211/sta_info.h | 2 ++ 5 files changed, 31 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 002db5e..1f51f41 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -76,7 +76,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, #endif /* CONFIG_MAC80211_HT_DEBUG */ if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP, - &sta->sta, tid, NULL)) + &sta->sta, tid, NULL, 0)) printk(KERN_DEBUG "HW problem - can not stop rx " "aggregation for tid %d\n", tid); @@ -297,7 +297,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START, - &sta->sta, tid, &start_seq_num); + &sta->sta, tid, &start_seq_num, 0); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); #endif /* CONFIG_MAC80211_HT_DEBUG */ diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 9cc472c..42f7c90 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -190,7 +190,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_STOP, - &sta->sta, tid, NULL); + &sta->sta, tid, NULL, 0); /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { @@ -311,7 +311,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) start_seq_num = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START, - &sta->sta, tid, &start_seq_num); + &sta->sta, tid, &start_seq_num, 0); if (ret) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - HW unavailable for" @@ -487,7 +487,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_TX_OPERATIONAL, - &sta->sta, tid, NULL); + &sta->sta, tid, NULL, + sta->ampdu_mlme.tid_tx[tid]->buf_size); /* * synchronize with TX path, while splicing the TX path @@ -742,9 +743,11 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, { struct tid_ampdu_tx *tid_tx; u16 capab, tid; + u8 buf_size; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; mutex_lock(&sta->ampdu_mlme.mtx); @@ -767,12 +770,23 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { + /* + * IEEE 802.11-2007 7.3.1.14: + * In an ADDBA Response frame, when the Status Code field + * is set to 0, the Buffer Size subfield is set to a value + * of at least 1. + */ + if (!buf_size) + goto out; + if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { /* ignore duplicate response */ goto out; } + tid_tx->buf_size = buf_size; + if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)) ieee80211_agg_tx_operational(local, sta, tid); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 98d5899..78af32d 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -382,17 +382,17 @@ static inline int drv_ampdu_action(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, - u16 *ssn) + u16 *ssn, u8 buf_size) { int ret = -EOPNOTSUPP; might_sleep(); - trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn); + trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size); if (local->ops->ampdu_action) ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, - sta, tid, ssn); + sta, tid, ssn, buf_size); trace_drv_return_int(local, ret); diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 49c8421..fbabbc2 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -784,9 +784,9 @@ TRACE_EVENT(drv_ampdu_action, struct ieee80211_sub_if_data *sdata, enum ieee80211_ampdu_mlme_action action, struct ieee80211_sta *sta, u16 tid, - u16 *ssn), + u16 *ssn, u8 buf_size), - TP_ARGS(local, sdata, action, sta, tid, ssn), + TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size), TP_STRUCT__entry( LOCAL_ENTRY @@ -794,6 +794,7 @@ TRACE_EVENT(drv_ampdu_action, __field(u32, action) __field(u16, tid) __field(u16, ssn) + __field(u8, buf_size) VIF_ENTRY ), @@ -804,11 +805,13 @@ TRACE_EVENT(drv_ampdu_action, __entry->action = action; __entry->tid = tid; __entry->ssn = ssn ? *ssn : 0; + __entry->buf_size = buf_size; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d", - LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, + __entry->tid, __entry->buf_size ) ); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index bbdd2a8..ca0b690 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -82,6 +82,7 @@ enum ieee80211_sta_info_flags { * @state: session state (see above) * @stop_initiator: initiator of a session stop * @tx_stop: TX DelBA frame when stopping + * @buf_size: reorder buffer size at receiver * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -101,6 +102,7 @@ struct tid_ampdu_tx { u8 dialog_token; u8 stop_initiator; bool tx_stop; + u8 buf_size; }; /** -- cgit v1.1 From 5dd36bc933e8be84f8369ac64505a2938f9ce036 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 18 Jan 2011 13:52:23 +0100 Subject: mac80211: allow advertising correct maximum aggregate size Currently, mac80211 always advertises that it may send up to 64 subframes in an aggregate. This is fine, since it's the max, but might as well be set to zero instead since it doesn't have any information. However, drivers might have that information, so allow them to set a variable giving it, which will then be used. The default of zero will be fine since to the peer that means we don't know and it will just use its own limit for the buffer size. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 42f7c90..63d852c 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -342,7 +342,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, tid_tx->dialog_token, start_seq_num, - 0x40, tid_tx->timeout); + local->hw.max_tx_aggregation_subframes, + tid_tx->timeout); } int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, -- cgit v1.1 From fbb327c5945448e98480d610815143a6d4a63638 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 18 Jan 2011 15:48:48 +0100 Subject: mac80211: drop non-auth 3-addr data frames when running as a 4-addr station When running as a 4-addr station against an AP that has the 4-addr VLAN interface and the main 3-addr AP interface bridged together, sometimes frames originating from the station were looping back from the 3-addr AP interface, causing the bridge code to emit warnings about receiving frames with its own source address. I'm not sure why this is happening yet, but I think it's a good idea to drop all frames (except 802.1x/EAP frames) that do not match the configured addressing mode, including 4-address frames sent to a 3-address station. User test reports indicate that the problem goes away with this patch. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/rx.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a6701ed..1236710 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1556,17 +1556,36 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + bool check_port_control = false; + struct ethhdr *ehdr; + int ret; if (ieee80211_has_a4(hdr->frame_control) && sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) return -1; + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) { + + if (!sdata->u.mgd.use_4addr) + return -1; + else + check_port_control = true; + } + if (is_multicast_ether_addr(hdr->addr1) && - ((sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) || - (sdata->vif.type == NL80211_IFTYPE_STATION && sdata->u.mgd.use_4addr))) + sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) return -1; - return ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); + ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); + if (ret < 0 || !check_port_control) + return ret; + + ehdr = (struct ethhdr *) rx->skb->data; + if (ehdr->h_proto != rx->sdata->control_port_protocol) + return -1; + + return 0; } /* -- cgit v1.1 From cc4fc022571376412986e27e08b0765e9cb2aafb Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 18 Jan 2011 17:32:40 +0100 Subject: netfilter: xtables: connlimit revision 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds destination address-based selection. The old "inverse" member is overloaded (memory-wise) with a new "flags" variable, similar to how J.Park did it with xt_string rev 1. Since revision 0 userspace only sets flag 0x1, no great changes are made to explicitly test for different revisions. Signed-off-by: Jan Engelhardt --- net/netfilter/xt_connlimit.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 452bc16..7fd3fd5 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -193,10 +193,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); - memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); + memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? + &iph->daddr : &iph->saddr, sizeof(addr.ip6)); } else { const struct iphdr *iph = ip_hdr(skb); - addr.ip = iph->saddr; + addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? + iph->daddr : iph->saddr; } spin_lock_bh(&info->data->lock); @@ -208,7 +210,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) /* kmalloc failed, drop it entirely */ goto hotdrop; - return (connections > info->limit) ^ info->inverse; + return (connections > info->limit) ^ + !!(info->flags & XT_CONNLIMIT_INVERT); hotdrop: par->hotdrop = true; @@ -266,25 +269,38 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) kfree(info->data); } -static struct xt_match connlimit_mt_reg __read_mostly = { - .name = "connlimit", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = connlimit_mt_check, - .match = connlimit_mt, - .matchsize = sizeof(struct xt_connlimit_info), - .destroy = connlimit_mt_destroy, - .me = THIS_MODULE, +static struct xt_match connlimit_mt_reg[] __read_mostly = { + { + .name = "connlimit", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = connlimit_mt_check, + .match = connlimit_mt, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_mt_destroy, + .me = THIS_MODULE, + }, + { + .name = "connlimit", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = connlimit_mt_check, + .match = connlimit_mt, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_mt_destroy, + .me = THIS_MODULE, + }, }; static int __init connlimit_mt_init(void) { - return xt_register_match(&connlimit_mt_reg); + return xt_register_matches(connlimit_mt_reg, + ARRAY_SIZE(connlimit_mt_reg)); } static void __exit connlimit_mt_exit(void) { - xt_unregister_match(&connlimit_mt_reg); + xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg)); } module_init(connlimit_mt_init); -- cgit v1.1 From f5c88f56b35599ab9ff2d3398e0153e4cd4a4c82 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 19 Jan 2011 19:10:49 +0100 Subject: netfilter: nf_conntrack: fix lifetime display for disabled connections When no tstamp extension exists, ct_delta_time() returns -1, which is then assigned to an u64 and tested for negative values to decide whether to display the lifetime. This obviously doesn't work, use a s64 and merge the two minor functions into one. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_standalone.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 69107fd..0ae1428 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -141,29 +141,24 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP -static u_int64_t ct_delta_time(u_int64_t time_now, const struct nf_conn *ct) +static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) { + struct ct_iter_state *st = s->private; struct nf_conn_tstamp *tstamp; + s64 delta_time; tstamp = nf_conn_tstamp_find(ct); if (tstamp) { - u_int64_t delta_time = time_now - tstamp->start; - return delta_time > 0 ? div_s64(delta_time, NSEC_PER_SEC) : 0; + delta_time = st->time_now - tstamp->start; + if (delta_time > 0) + delta_time = div_s64(delta_time, NSEC_PER_SEC); + else + delta_time = 0; + + return seq_printf(s, "delta-time=%llu ", + (unsigned long long)delta_time); } - return -1; -} - -static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) -{ - struct ct_iter_state *st = s->private; - u_int64_t delta_time; - - delta_time = ct_delta_time(st->time_now, ct); - if (delta_time < 0) - return 0; - - return seq_printf(s, "delta-time=%llu ", - (unsigned long long)delta_time); + return 0; } #else static inline int -- cgit v1.1 From 441c793a56502638d45d5da2195056d686147370 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Thu, 13 Jan 2011 22:19:52 +0000 Subject: net: cleanup unused macros in net directory Clean up some unused macros in net/*. 1. be left for code change. e.g. PGV_FROM_VMALLOC, PGV_FROM_VMALLOC, KMEM_SAFETYZONE. 2. never be used since introduced to kernel. e.g. P9_RDMA_MAX_SGE, UTIL_CTRL_PKT_SIZE. Signed-off-by: Shan Wei Acked-by: Sjur Braendeland Signed-off-by: David S. Miller --- net/9p/trans_rdma.c | 1 - net/caif/cfcnfg.c | 2 -- net/caif/cfdgml.c | 1 - net/caif/cfserl.c | 1 - net/caif/cfutill.c | 2 +- net/caif/cfveil.c | 2 +- net/decnet/dn_table.c | 1 - net/packet/af_packet.c | 1 - net/rds/rds.h | 1 - net/wanrouter/wanmain.c | 2 -- 10 files changed, 2 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 17c5ba7..29a54cc 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -59,7 +59,6 @@ * safely advertise a maxsize * of 64k */ -#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) /** * struct p9_trans_rdma - RDMA transport instance * diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index c665de7..f1f98d9 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -23,10 +23,8 @@ #include #define MAX_PHY_LAYERS 7 -#define PHY_NAME_LEN 20 #define container_obj(layr) container_of(layr, struct cfcnfg, layer) -#define RFM_FRAGMENT_SIZE 4030 /* Information about CAIF physical interfaces held by Config Module in order * to manage physical interfaces diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index d3ed264..27dab26 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -18,7 +18,6 @@ #define DGM_CMD_BIT 0x80 #define DGM_FLOW_OFF 0x81 #define DGM_FLOW_ON 0x80 -#define DGM_CTRL_PKT_SIZE 1 #define DGM_MTU 1500 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt); diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 9297f7d..8303fe3 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -25,7 +25,6 @@ struct cfserl { spinlock_t sync; bool usestx; }; -#define STXLEN(layr) (layr->usestx ? 1 : 0) static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index efad410..315c0d6 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -20,7 +20,7 @@ #define UTIL_REMOTE_SHUTDOWN 0x82 #define UTIL_FLOW_OFF 0x81 #define UTIL_FLOW_ON 0x80 -#define UTIL_CTRL_PKT_SIZE 1 + static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt); diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 3b425b1..c3b1dec 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -17,7 +17,7 @@ #define VEI_FLOW_OFF 0x81 #define VEI_FLOW_ON 0x80 #define VEI_SET_PIN 0x82 -#define VEI_CTRL_PKT_SIZE 1 + #define container_obj(layr) container_of(layr, struct cfsrvl, layer) static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index f2abd37..b66600b 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -59,7 +59,6 @@ struct dn_hash }; #define dz_key_0(key) ((key).datum = 0) -#define dz_prefix(key,dz) ((key).datum) #define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\ for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c3fc7b7..c60649e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -164,7 +164,6 @@ struct packet_mreq_max { static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing, int tx_ring); -#define PGV_FROM_VMALLOC 1 struct pgv { char *buffer; }; diff --git a/net/rds/rds.h b/net/rds/rds.h index 9542449..da8adac 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...) #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) #define RDS_CONG_MAP_BYTES (65536 / 8) -#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long)) #define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 74944a2..788a12c 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -59,8 +59,6 @@ #include /* copy_to/from_user */ #include /* __initfunc et al. */ -#define KMEM_SAFETYZONE 8 - #define DEV_TO_SLAVE(dev) (*((struct net_device **)netdev_priv(dev))) /* -- cgit v1.1 From cbda10fa97d72c7a1923be4426171aa90e8c6dab Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Thu, 13 Jan 2011 23:38:30 +0000 Subject: net_device: add support for network device groups Net devices can now be grouped, enabling simpler manipulation from userspace. This patch adds a group field to the net_device structure, as well as rtnetlink support to query and modify it. Signed-off-by: Vlad Dogaru Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++++++++ net/core/rtnetlink.c | 6 ++++++ 2 files changed, 18 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7741507..2b85d4a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4572,6 +4572,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) EXPORT_SYMBOL(dev_set_mtu); /** + * dev_set_group - Change group this device belongs to + * @dev: device + * @new_group: group this device should belong to + */ +void dev_set_group(struct net_device *dev, int new_group) +{ + dev->group = new_group; +} +EXPORT_SYMBOL(dev_set_group); + +/** * dev_set_mac_address - Change Media Access Control Address * @dev: device * @sa: new address @@ -5678,6 +5689,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); strcpy(dev->name, name); + dev->group = INIT_NETDEV_GROUP; return dev; free_pcpu: diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a5f7535..09062b0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, netif_running(dev) ? dev->operstate : IF_OPER_DOWN); NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); + NLA_PUT_U32(skb, IFLA_GROUP, dev->group); if (dev->ifindex != dev->iflink) NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); @@ -1265,6 +1266,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_GROUP]) { + dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); + modified = 1; + } + /* * Interface selected by interface index but interface * name provided implies that a name change has been -- cgit v1.1 From e7ed828f10bd89a28f821ae7f20e691704d61923 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Thu, 13 Jan 2011 23:38:31 +0000 Subject: netlink: support setting devgroup parameters If a rtnetlink request specifies a negative or zero ifindex and has no interface name attribute, but has a group attribute, then the chenges are made to all the interfaces belonging to the specified group. Signed-off-by: Vlad Dogaru Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 09062b0..a0b2eeb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1558,6 +1558,24 @@ err: } EXPORT_SYMBOL(rtnl_create_link); +static int rtnl_group_changelink(struct net *net, int group, + struct ifinfomsg *ifm, + struct nlattr **tb) +{ + struct net_device *dev; + int err; + + for_each_netdev(net, dev) { + if (dev->group == group) { + err = do_setlink(dev, ifm, tb, NULL, 0); + if (err < 0) + return err; + } + } + + return 0; +} + static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1585,10 +1603,16 @@ replay: ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); - else if (ifname[0]) - dev = __dev_get_by_name(net, ifname); - else - dev = NULL; + else { + if (ifname[0]) + dev = __dev_get_by_name(net, ifname); + else if (tb[IFLA_GROUP]) + return rtnl_group_changelink(net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, tb); + else + dev = NULL; + } err = validate_linkmsg(dev, tb); if (err < 0) -- cgit v1.1 From 4f57c087de9b46182545676d2c594120a20f2e58 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jan 2011 08:06:04 +0000 Subject: net: implement mechanism for HW based QOS This patch provides a mechanism for lower layer devices to steer traffic using skb->priority to tx queues. This allows for hardware based QOS schemes to use the default qdisc without incurring the penalties related to global state and the qdisc lock. While reliably receiving skbs on the correct tx ring to avoid head of line blocking resulting from shuffling in the LLD. Finally, all the goodness from txq caching and xps/rps can still be leveraged. Many drivers and hardware exist with the ability to implement QOS schemes in the hardware but currently these drivers tend to rely on firmware to reroute specific traffic, a driver specific select_queue or the queue_mapping action in the qdisc. By using select_queue for this drivers need to be updated for each and every traffic type and we lose the goodness of much of the upstream work. Firmware solutions are inherently inflexible. And finally if admins are expected to build a qdisc and filter rules to steer traffic this requires knowledge of how the hardware is currently configured. The number of tx queues and the queue offsets may change depending on resources. Also this approach incurs all the overhead of a qdisc with filters. With the mechanism in this patch users can set skb priority using expected methods ie setsockopt() or the stack can set the priority directly. Then the skb will be steered to the correct tx queues aligned with hardware QOS traffic classes. In the normal case with single traffic class and all queues in this class everything works as is until the LLD enables multiple tcs. To steer the skb we mask out the lower 4 bits of the priority and allow the hardware to configure upto 15 distinct classes of traffic. This is expected to be sufficient for most applications at any rate it is more then the 8021Q spec designates and is equal to the number of prio bands currently implemented in the default qdisc. This in conjunction with a userspace application such as lldpad can be used to implement 8021Q transmission selection algorithms one of these algorithms being the extended transmission selection algorithm currently being used for DCB. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2b85d4a..8b1d886 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1593,6 +1593,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } +/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change + * @dev: Network device + * @txq: number of queues available + * + * If real_num_tx_queues is changed the tc mappings may no longer be + * valid. To resolve this verify the tc mapping remains valid and if + * not NULL the mapping. With no priorities mapping to this + * offset/count pair it will no longer be used. In the worst case TC0 + * is invalid nothing can be done so disable priority mappings. If is + * expected that drivers will fix this mapping if they can before + * calling netif_set_real_num_tx_queues. + */ +void netif_setup_tc(struct net_device *dev, unsigned int txq) +{ + int i; + struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; + + /* If TC0 is invalidated disable TC mapping */ + if (tc->offset + tc->count > txq) { + pr_warning("Number of in use tx queues changed " + "invalidating tc mappings. Priority " + "traffic classification disabled!\n"); + dev->num_tc = 0; + return; + } + + /* Invalidated prio to tc mappings set to TC0 */ + for (i = 1; i < TC_BITMASK + 1; i++) { + int q = netdev_get_prio_tc_map(dev, i); + + tc = &dev->tc_to_txq[q]; + if (tc->offset + tc->count > txq) { + pr_warning("Number of in use tx queues " + "changed. Priority %i to tc " + "mapping %i is no longer valid " + "setting map to 0\n", + i, q); + netdev_set_prio_tc_map(dev, i, 0); + } + } +} + /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -1612,6 +1654,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (rc) return rc; + if (dev->num_tc) + netif_setup_tc(dev, txq); + if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -2161,6 +2206,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, unsigned int num_tx_queues) { u32 hash; + u16 qoffset = 0; + u16 qcount = num_tx_queues; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); @@ -2169,13 +2216,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, return hash; } + if (dev->num_tc) { + u8 tc = netdev_get_prio_tc_map(dev, skb->priority); + qoffset = dev->tc_to_txq[tc].offset; + qcount = dev->tc_to_txq[tc].count; + } + if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); - return (u16) (((u64) hash * num_tx_queues) >> 32); + return (u16) (((u64) hash * qcount) >> 32) + qoffset; } EXPORT_SYMBOL(__skb_tx_hash); -- cgit v1.1 From b8970f0bfc78103cb74c66055de7379b15097840 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 17 Jan 2011 08:06:09 +0000 Subject: net_sched: implement a root container qdisc sch_mqprio This implements a mqprio queueing discipline that by default creates a pfifo_fast qdisc per tx queue and provides the needed configuration interface. Using the mqprio qdisc the number of tcs currently in use along with the range of queues alloted to each class can be configured. By default skbs are mapped to traffic classes using the skb priority. This mapping is configurable. Configurable parameters, struct tc_mqprio_qopt { __u8 num_tc; __u8 prio_tc_map[TC_BITMASK + 1]; __u8 hw; __u16 count[TC_MAX_QUEUE]; __u16 offset[TC_MAX_QUEUE]; }; Here the count/offset pairing give the queue alignment and the prio_tc_map gives the mapping from skb->priority to tc. The hw bit determines if the hardware should configure the count and offset values. If the hardware bit is set then the operation will fail if the hardware does not implement the ndo_setup_tc operation. This is to avoid undetermined states where the hardware may or may not control the queue mapping. Also minimal bounds checking is done on the count/offset to verify a queue does not exceed num_tx_queues and that queue ranges do not overlap. Otherwise it is left to user policy or hardware configuration to create useful mappings. It is expected that hardware QOS schemes can be implemented by creating appropriate mappings of queues in ndo_tc_setup(). One expected use case is drivers will use the ndo_setup_tc to map queue ranges onto 802.1Q traffic classes. This provides a generic mechanism to map network traffic onto these traffic classes and removes the need for lower layer drivers to know specifics about traffic types. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/Kconfig | 12 ++ net/sched/Makefile | 1 + net/sched/sch_generic.c | 4 + net/sched/sch_mqprio.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 434 insertions(+) create mode 100644 net/sched/sch_mqprio.c (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index f04d4a4..73431d4 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -205,6 +205,18 @@ config NET_SCH_DRR If unsure, say N. +config NET_SCH_MQPRIO + tristate "Multi-queue priority scheduler (MQPRIO)" + help + Say Y here if you want to use the Multi-queue Priority scheduler. + This scheduler allows QOS to be offloaded on NICs that have support + for offloading QOS schedulers. + + To compile this driver as a module, choose M here: the module will + be called sch_mqprio. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index 960f5db..26ce681 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o +obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 34dc598..723b278 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -540,6 +540,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { .dump = pfifo_fast_dump, .owner = THIS_MODULE, }; +EXPORT_SYMBOL(pfifo_fast_ops); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops) @@ -674,6 +675,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, return oqdisc; } +EXPORT_SYMBOL(dev_graft_qdisc); static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, @@ -761,6 +763,7 @@ void dev_activate(struct net_device *dev) dev_watchdog_up(dev); } } +EXPORT_SYMBOL(dev_activate); static void dev_deactivate_queue(struct net_device *dev, struct netdev_queue *dev_queue, @@ -840,6 +843,7 @@ void dev_deactivate(struct net_device *dev) list_add(&dev->unreg_list, &single); dev_deactivate_many(&single); } +EXPORT_SYMBOL(dev_deactivate); static void dev_init_scheduler_queue(struct net_device *dev, struct netdev_queue *dev_queue, diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c new file mode 100644 index 0000000..8620c65 --- /dev/null +++ b/net/sched/sch_mqprio.c @@ -0,0 +1,417 @@ +/* + * net/sched/sch_mqprio.c + * + * Copyright (c) 2010 John Fastabend + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct mqprio_sched { + struct Qdisc **qdiscs; + int hw_owned; +}; + +static void mqprio_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct mqprio_sched *priv = qdisc_priv(sch); + unsigned int ntx; + + if (!priv->qdiscs) + return; + + for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) + qdisc_destroy(priv->qdiscs[ntx]); + + if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) + dev->netdev_ops->ndo_setup_tc(dev, 0); + else + netdev_set_num_tc(dev, 0); + + kfree(priv->qdiscs); +} + +static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) +{ + int i, j; + + /* Verify num_tc is not out of max range */ + if (qopt->num_tc > TC_MAX_QUEUE) + return -EINVAL; + + /* Verify priority mapping uses valid tcs */ + for (i = 0; i < TC_BITMASK + 1; i++) { + if (qopt->prio_tc_map[i] >= qopt->num_tc) + return -EINVAL; + } + + /* net_device does not support requested operation */ + if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) + return -EINVAL; + + /* if hw owned qcount and qoffset are taken from LLD so + * no reason to verify them here + */ + if (qopt->hw) + return 0; + + for (i = 0; i < qopt->num_tc; i++) { + unsigned int last = qopt->offset[i] + qopt->count[i]; + + /* Verify the queue count is in tx range being equal to the + * real_num_tx_queues indicates the last queue is in use. + */ + if (qopt->offset[i] >= dev->real_num_tx_queues || + !qopt->count[i] || + last > dev->real_num_tx_queues) + return -EINVAL; + + /* Verify that the offset and counts do not overlap */ + for (j = i + 1; j < qopt->num_tc; j++) { + if (last > qopt->offset[j]) + return -EINVAL; + } + } + + return 0; +} + +static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct net_device *dev = qdisc_dev(sch); + struct mqprio_sched *priv = qdisc_priv(sch); + struct netdev_queue *dev_queue; + struct Qdisc *qdisc; + int i, err = -EOPNOTSUPP; + struct tc_mqprio_qopt *qopt = NULL; + + BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); + BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); + + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + if (!netif_is_multiqueue(dev)) + return -EOPNOTSUPP; + + if (nla_len(opt) < sizeof(*qopt)) + return -EINVAL; + + qopt = nla_data(opt); + if (mqprio_parse_opt(dev, qopt)) + return -EINVAL; + + /* pre-allocate qdisc, attachment can't fail */ + priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), + GFP_KERNEL); + if (priv->qdiscs == NULL) { + err = -ENOMEM; + goto err; + } + + for (i = 0; i < dev->num_tx_queues; i++) { + dev_queue = netdev_get_tx_queue(dev, i); + qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(i + 1))); + if (qdisc == NULL) { + err = -ENOMEM; + goto err; + } + qdisc->flags |= TCQ_F_CAN_BYPASS; + priv->qdiscs[i] = qdisc; + } + + /* If the mqprio options indicate that hardware should own + * the queue mapping then run ndo_setup_tc otherwise use the + * supplied and verified mapping + */ + if (qopt->hw) { + priv->hw_owned = 1; + err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); + if (err) + goto err; + } else { + netdev_set_num_tc(dev, qopt->num_tc); + for (i = 0; i < qopt->num_tc; i++) + netdev_set_tc_queue(dev, i, + qopt->count[i], qopt->offset[i]); + } + + /* Always use supplied priority mappings */ + for (i = 0; i < TC_BITMASK + 1; i++) + netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]); + + sch->flags |= TCQ_F_MQROOT; + return 0; + +err: + mqprio_destroy(sch); + return err; +} + +static void mqprio_attach(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct mqprio_sched *priv = qdisc_priv(sch); + struct Qdisc *qdisc; + unsigned int ntx; + + /* Attach underlying qdisc */ + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + qdisc = priv->qdiscs[ntx]; + qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); + if (qdisc) + qdisc_destroy(qdisc); + } + kfree(priv->qdiscs); + priv->qdiscs = NULL; +} + +static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, + unsigned long cl) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); + + if (ntx >= dev->num_tx_queues) + return NULL; + return netdev_get_tx_queue(dev, ntx); +} + +static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, + struct Qdisc **old) +{ + struct net_device *dev = qdisc_dev(sch); + struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); + + if (!dev_queue) + return -EINVAL; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + *old = dev_graft_qdisc(dev_queue, new); + + if (dev->flags & IFF_UP) + dev_activate(dev); + + return 0; +} + +static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct net_device *dev = qdisc_dev(sch); + struct mqprio_sched *priv = qdisc_priv(sch); + unsigned char *b = skb_tail_pointer(skb); + struct tc_mqprio_qopt opt; + struct Qdisc *qdisc; + unsigned int i; + + sch->q.qlen = 0; + memset(&sch->bstats, 0, sizeof(sch->bstats)); + memset(&sch->qstats, 0, sizeof(sch->qstats)); + + for (i = 0; i < dev->num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc; + spin_lock_bh(qdisc_lock(qdisc)); + sch->q.qlen += qdisc->q.qlen; + sch->bstats.bytes += qdisc->bstats.bytes; + sch->bstats.packets += qdisc->bstats.packets; + sch->qstats.qlen += qdisc->qstats.qlen; + sch->qstats.backlog += qdisc->qstats.backlog; + sch->qstats.drops += qdisc->qstats.drops; + sch->qstats.requeues += qdisc->qstats.requeues; + sch->qstats.overlimits += qdisc->qstats.overlimits; + spin_unlock_bh(qdisc_lock(qdisc)); + } + + opt.num_tc = netdev_get_num_tc(dev); + memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); + opt.hw = priv->hw_owned; + + for (i = 0; i < netdev_get_num_tc(dev); i++) { + opt.count[i] = dev->tc_to_txq[i].count; + opt.offset[i] = dev->tc_to_txq[i].offset; + } + + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) +{ + struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); + + if (!dev_queue) + return NULL; + + return dev_queue->qdisc_sleeping; +} + +static unsigned long mqprio_get(struct Qdisc *sch, u32 classid) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx = TC_H_MIN(classid); + + if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) + return 0; + return ntx; +} + +static void mqprio_put(struct Qdisc *sch, unsigned long cl) +{ +} + +static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct net_device *dev = qdisc_dev(sch); + + if (cl <= netdev_get_num_tc(dev)) { + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_info = 0; + } else { + int i; + struct netdev_queue *dev_queue; + + dev_queue = mqprio_queue_get(sch, cl); + tcm->tcm_parent = 0; + for (i = 0; i < netdev_get_num_tc(dev); i++) { + struct netdev_tc_txq tc = dev->tc_to_txq[i]; + int q_idx = cl - netdev_get_num_tc(dev); + + if (q_idx > tc.offset && + q_idx <= tc.offset + tc.count) { + tcm->tcm_parent = + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(i + 1)); + break; + } + } + tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + } + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct net_device *dev = qdisc_dev(sch); + + if (cl <= netdev_get_num_tc(dev)) { + int i; + struct Qdisc *qdisc; + struct gnet_stats_queue qstats = {0}; + struct gnet_stats_basic_packed bstats = {0}; + struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; + + /* Drop lock here it will be reclaimed before touching + * statistics this is required because the d->lock we + * hold here is the look on dev_queue->qdisc_sleeping + * also acquired below. + */ + spin_unlock_bh(d->lock); + + for (i = tc.offset; i < tc.offset + tc.count; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc; + spin_lock_bh(qdisc_lock(qdisc)); + bstats.bytes += qdisc->bstats.bytes; + bstats.packets += qdisc->bstats.packets; + qstats.qlen += qdisc->qstats.qlen; + qstats.backlog += qdisc->qstats.backlog; + qstats.drops += qdisc->qstats.drops; + qstats.requeues += qdisc->qstats.requeues; + qstats.overlimits += qdisc->qstats.overlimits; + spin_unlock_bh(qdisc_lock(qdisc)); + } + /* Reclaim root sleeping lock before completing stats */ + spin_lock_bh(d->lock); + if (gnet_stats_copy_basic(d, &bstats) < 0 || + gnet_stats_copy_queue(d, &qstats) < 0) + return -1; + } else { + struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); + + sch = dev_queue->qdisc_sleeping; + sch->qstats.qlen = sch->q.qlen; + if (gnet_stats_copy_basic(d, &sch->bstats) < 0 || + gnet_stats_copy_queue(d, &sch->qstats) < 0) + return -1; + } + return 0; +} + +static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned long ntx; + + if (arg->stop) + return; + + /* Walk hierarchy with a virtual class per tc */ + arg->count = arg->skip; + for (ntx = arg->skip; + ntx < dev->num_tx_queues + netdev_get_num_tc(dev); + ntx++) { + if (arg->fn(sch, ntx + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops mqprio_class_ops = { + .graft = mqprio_graft, + .leaf = mqprio_leaf, + .get = mqprio_get, + .put = mqprio_put, + .walk = mqprio_walk, + .dump = mqprio_dump_class, + .dump_stats = mqprio_dump_class_stats, +}; + +struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { + .cl_ops = &mqprio_class_ops, + .id = "mqprio", + .priv_size = sizeof(struct mqprio_sched), + .init = mqprio_init, + .destroy = mqprio_destroy, + .attach = mqprio_attach, + .dump = mqprio_dump, + .owner = THIS_MODULE, +}; + +static int __init mqprio_module_init(void) +{ + return register_qdisc(&mqprio_qdisc_ops); +} + +static void __exit mqprio_module_exit(void) +{ + unregister_qdisc(&mqprio_qdisc_ops); +} + +module_init(mqprio_module_init); +module_exit(mqprio_module_exit); + +MODULE_LICENSE("GPL"); -- cgit v1.1 From 7180a03118cac7256fb04f929fe34d0aeee92c40 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Wed, 19 Jan 2011 04:56:36 +0000 Subject: af_unix: coding style: remove one level of indentation in unix_shutdown() Signed-off-by: Alban Crequy Reviewed-by: Ian Molton Signed-off-by: David S. Miller --- net/unix/af_unix.c | 60 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8d9bbba..d8d98d5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1984,36 +1984,38 @@ static int unix_shutdown(struct socket *sock, int mode) mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); - if (mode) { - unix_state_lock(sk); - sk->sk_shutdown |= mode; - other = unix_peer(sk); - if (other) - sock_hold(other); - unix_state_unlock(sk); - sk->sk_state_change(sk); - - if (other && - (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { - - int peer_mode = 0; - - if (mode&RCV_SHUTDOWN) - peer_mode |= SEND_SHUTDOWN; - if (mode&SEND_SHUTDOWN) - peer_mode |= RCV_SHUTDOWN; - unix_state_lock(other); - other->sk_shutdown |= peer_mode; - unix_state_unlock(other); - other->sk_state_change(other); - if (peer_mode == SHUTDOWN_MASK) - sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); - else if (peer_mode & RCV_SHUTDOWN) - sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); - } - if (other) - sock_put(other); + if (!mode) + return 0; + + unix_state_lock(sk); + sk->sk_shutdown |= mode; + other = unix_peer(sk); + if (other) + sock_hold(other); + unix_state_unlock(sk); + sk->sk_state_change(sk); + + if (other && + (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { + + int peer_mode = 0; + + if (mode&RCV_SHUTDOWN) + peer_mode |= SEND_SHUTDOWN; + if (mode&SEND_SHUTDOWN) + peer_mode |= RCV_SHUTDOWN; + unix_state_lock(other); + other->sk_shutdown |= peer_mode; + unix_state_unlock(other); + other->sk_state_change(other); + if (peer_mode == SHUTDOWN_MASK) + sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); + else if (peer_mode & RCV_SHUTDOWN) + sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); } + if (other) + sock_put(other); + return 0; } -- cgit v1.1 From cc7ec456f82da7f89a5b376e613b3ac4311b3e9a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2011 19:26:56 +0000 Subject: net_sched: cleanups Cleanup net/sched code to current CodingStyle and practices. Reduce inline abuse Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/act_api.c | 46 +++---- net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 8 +- net/sched/act_ipt.c | 16 ++- net/sched/act_mirred.c | 4 +- net/sched/act_nat.c | 2 +- net/sched/act_pedit.c | 10 +- net/sched/act_police.c | 9 +- net/sched/act_simple.c | 10 +- net/sched/act_skbedit.c | 8 +- net/sched/cls_api.c | 33 +++-- net/sched/cls_basic.c | 17 +-- net/sched/cls_cgroup.c | 8 +- net/sched/cls_flow.c | 4 +- net/sched/cls_fw.c | 38 +++-- net/sched/cls_route.c | 126 +++++++++-------- net/sched/cls_rsvp.h | 95 ++++++------- net/sched/cls_tcindex.c | 2 +- net/sched/cls_u32.c | 77 ++++++----- net/sched/em_cmp.c | 47 +++---- net/sched/em_meta.c | 42 +++--- net/sched/em_nbyte.c | 3 +- net/sched/em_text.c | 3 +- net/sched/em_u32.c | 2 +- net/sched/ematch.c | 37 +++-- net/sched/sch_api.c | 137 +++++++++--------- net/sched/sch_atm.c | 16 +-- net/sched/sch_cbq.c | 358 ++++++++++++++++++++++++------------------------ net/sched/sch_dsmark.c | 21 ++- net/sched/sch_fifo.c | 9 +- net/sched/sch_generic.c | 29 ++-- net/sched/sch_gred.c | 85 ++++++------ net/sched/sch_hfsc.c | 35 +++-- net/sched/sch_htb.c | 104 ++++++++------ net/sched/sch_multiq.c | 8 +- net/sched/sch_netem.c | 6 +- net/sched/sch_prio.c | 34 ++--- net/sched/sch_red.c | 61 ++++----- net/sched/sch_sfq.c | 18 ++- net/sched/sch_tbf.c | 37 ++--- net/sched/sch_teql.c | 36 ++--- 41 files changed, 842 insertions(+), 801 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 23b25f8..15873e1 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -78,7 +78,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a, struct tcf_hashinfo *hinfo) { struct tcf_common *p; - int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; + int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct nlattr *nest; read_lock_bh(hinfo->lock); @@ -126,7 +126,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, { struct tcf_common *p, *s_p; struct nlattr *nest; - int i= 0, n_i = 0; + int i = 0, n_i = 0; nest = nla_nest_start(skb, a->order); if (nest == NULL) @@ -138,7 +138,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, while (p != NULL) { s_p = p->tcfc_next; if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) - module_put(a->ops->owner); + module_put(a->ops->owner); n_i++; p = s_p; } @@ -447,7 +447,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; - if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { + err = tcf_action_dump_old(skb, a, bind, ref); + if (err > 0) { nla_nest_end(skb, nest); return err; } @@ -491,7 +492,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, struct tc_action *a; struct tc_action_ops *a_o; char act_name[IFNAMSIZ]; - struct nlattr *tb[TCA_ACT_MAX+1]; + struct nlattr *tb[TCA_ACT_MAX + 1]; struct nlattr *kind; int err; @@ -549,9 +550,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, goto err_free; /* module count goes up only when brand new policy is created - if it exists and is only bound to in a_o->init() then - ACT_P_CREATED is not returned (a zero is). - */ + * if it exists and is only bound to in a_o->init() then + * ACT_P_CREATED is not returned (a zero is). + */ if (err != ACT_P_CREATED) module_put(a_o->owner); a->ops = a_o; @@ -569,7 +570,7 @@ err_out: struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind) { - struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; + struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *head = NULL, *act, *act_prev = NULL; int err; int i; @@ -697,7 +698,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, static struct tc_action * tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) { - struct nlattr *tb[TCA_ACT_MAX+1]; + struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; int index; int err; @@ -770,7 +771,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, struct tcamsg *t; struct netlink_callback dcb; struct nlattr *nest; - struct nlattr *tb[TCA_ACT_MAX+1]; + struct nlattr *tb[TCA_ACT_MAX + 1]; struct nlattr *kind; struct tc_action *a = create_a(0); int err = -ENOMEM; @@ -821,7 +822,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); if (err > 0) return 0; @@ -842,14 +844,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) { int i, ret; - struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; + struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *head = NULL, *act, *act_prev = NULL; ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); if (ret < 0) return ret; - if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { + if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1] != NULL) return tca_action_flush(net, tb[1], n, pid); else @@ -892,7 +894,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* now do the delete */ tcf_action_destroy(head, 0); ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, - n->nlmsg_flags&NLM_F_ECHO); + n->nlmsg_flags & NLM_F_ECHO); if (ret > 0) return 0; return ret; @@ -936,7 +938,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); + err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -967,7 +969,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* dump then free all the actions after update; inserted policy * stays intact - * */ + */ ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); for (a = act; a; a = act) { act = a->next; @@ -993,8 +995,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; } - /* n->nlmsg_flags&NLM_F_CREATE - * */ + /* n->nlmsg_flags & NLM_F_CREATE */ switch (n->nlmsg_type) { case RTM_NEWACTION: /* we are going to assume all other flags @@ -1003,7 +1004,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) * but since we want avoid ambiguity (eg when flags * is zero) then just set this */ - if (n->nlmsg_flags&NLM_F_REPLACE) + if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; replay: ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); @@ -1028,7 +1029,7 @@ replay: static struct nlattr * find_dump_kind(const struct nlmsghdr *n) { - struct nlattr *tb1, *tb2[TCA_ACT_MAX+1]; + struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct nlattr *nla[TCAA_MAX + 1]; struct nlattr *kind; @@ -1071,9 +1072,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) } a_o = tc_lookup_action(kind); - if (a_o == NULL) { + if (a_o == NULL) return 0; - } memset(&a, 0, sizeof(struct tc_action)); a.ops = a_o; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 83ddfc0..6cdf9ab 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -63,7 +63,7 @@ static int tcf_csum_init(struct nlattr *nla, struct nlattr *est, if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy); + err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy); if (err < 0) return err; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index c2ed90a..2b4ab4b 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact) } typedef int (*g_rand)(struct tcf_gact *gact); -static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; +static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ }; #endif /* CONFIG_GACT_PROB */ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { @@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind, &gact_idx_gen, &gact_hash_info); if (IS_ERR(pc)) - return PTR_ERR(pc); + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { @@ -205,9 +205,9 @@ MODULE_LICENSE("GPL"); static int __init gact_init_module(void) { #ifdef CONFIG_GACT_PROB - printk(KERN_INFO "GACT probability on\n"); + pr_info("GACT probability on\n"); #else - printk(KERN_INFO "GACT probability NOT on\n"); + pr_info("GACT probability NOT on\n"); #endif return tcf_register_action(&act_gact_ops); } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index c2a7c20..9fc211a 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, &ipt_idx_gen, &ipt_hash_info); if (IS_ERR(pc)) - return PTR_ERR(pc); + return PTR_ERR(pc); ret = ACT_P_CREATED; } else { if (!ovr) { @@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, if (unlikely(!t)) goto err2; - if ((err = ipt_init_target(t, tname, hook)) < 0) + err = ipt_init_target(t, tname, hook); + if (err < 0) goto err3; spin_lock_bh(&ipt->tcf_lock); @@ -212,8 +213,9 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, bstats_update(&ipt->tcf_bstats, skb); /* yes, we have to worry about both in and out dev - worry later - danger - this API seems to have changed - from earlier kernels */ + * worry later - danger - this API seems to have changed + * from earlier kernels + */ par.in = skb->dev; par.out = NULL; par.hooknum = ipt->tcfi_hook; @@ -253,9 +255,9 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int struct tc_cnt c; /* for simple targets kernel size == user size - ** user name = target name - ** for foolproof you need to not assume this - */ + * user name = target name + * for foolproof you need to not assume this + */ t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); if (unlikely(!t)) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index d765067..961386e 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = { .lock = &mirred_lock, }; -static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) +static int tcf_mirred_release(struct tcf_mirred *m, int bind) { if (m) { if (bind) m->tcf_bindcnt--; m->tcf_refcnt--; - if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { + if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) { list_del(&m->tcfm_list); if (m->tcfm_dev) dev_put(m->tcfm_dev); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 178a4bd..762b027 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &nat_idx_gen, &nat_hash_info); if (IS_ERR(pc)) - return PTR_ERR(pc); + return PTR_ERR(pc); p = to_tcf_nat(pc); ret = ACT_P_CREATED; } else { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 445bef7..50c7c06 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &pedit_idx_gen, &pedit_hash_info); if (IS_ERR(pc)) - return PTR_ERR(pc); + return PTR_ERR(pc); p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { @@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, int i, munged = 0; unsigned int off; - if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - return p->tcf_action; - } - } + if (skb_cloned(skb) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + return p->tcf_action; off = skb_network_offset(skb); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index e2f08b1..8a16307 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,8 +22,8 @@ #include #include -#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) -#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) +#define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L) +#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L) #define POL_TAB_MASK 15 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; @@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = { }; /* old policer structure from before tc actions */ -struct tc_police_compat -{ +struct tc_police_compat { u32 index; int action; u32 limit; @@ -139,7 +138,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { - unsigned h; + unsigned int h; int ret = 0, err; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tc_police *parm; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 7287cff..a34a22d 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) - **/ + */ pr_info("simple: %s_%d\n", (char *)d->tcfd_defdata, d->tcf_bstats.packets); spin_unlock(&d->tcf_lock); @@ -125,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &simp_idx_gen, &simp_hash_info); if (IS_ERR(pc)) - return PTR_ERR(pc); + return PTR_ERR(pc); d = to_defact(pc); ret = alloc_defdata(d, defdata); @@ -149,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, return ret; } -static inline int tcf_simp_cleanup(struct tc_action *a, int bind) +static int tcf_simp_cleanup(struct tc_action *a, int bind) { struct tcf_defact *d = a->priv; @@ -158,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind) return 0; } -static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, - int bind, int ref) +static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_defact *d = a->priv; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 836f5fe..5f6f0c7 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -113,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &skbedit_idx_gen, &skbedit_hash_info); if (IS_ERR(pc)) - return PTR_ERR(pc); + return PTR_ERR(pc); d = to_skbedit(pc); ret = ACT_P_CREATED; @@ -144,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, return ret; } -static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) +static int tcf_skbedit_cleanup(struct tc_action *a, int bind) { struct tcf_skbedit *d = a->priv; @@ -153,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) return 0; } -static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, - int bind, int ref) +static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_skbedit *d = a->priv; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5fd0c28..bb2c523 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) int rc = -ENOENT; write_lock(&cls_mod_lock); - for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) + for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) if (t == ops) break; @@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) u32 first = TC_H_MAKE(0xC0000000U, 0U); if (tp) - first = tp->prio-1; + first = tp->prio - 1; return first; } @@ -149,7 +149,8 @@ replay: if (prio == 0) { /* If no priority is given, user wants we allocated it. */ - if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) + if (n->nlmsg_type != RTM_NEWTFILTER || + !(n->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; prio = TC_H_MAKE(0x80000000U, 0U); } @@ -176,7 +177,8 @@ replay: } /* Is it classful? */ - if ((cops = q->ops->cl_ops) == NULL) + cops = q->ops->cl_ops; + if (!cops) return -EINVAL; if (cops->tcf_chain == NULL) @@ -196,10 +198,11 @@ replay: goto errout; /* Check the chain for existence of proto-tcf with this priority */ - for (back = chain; (tp=*back) != NULL; back = &tp->next) { + for (back = chain; (tp = *back) != NULL; back = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { - if (!nprio || (tp->protocol != protocol && protocol)) + if (!nprio || + (tp->protocol != protocol && protocol)) goto errout; } else tp = NULL; @@ -216,7 +219,8 @@ replay: goto errout; err = -ENOENT; - if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) + if (n->nlmsg_type != RTM_NEWTFILTER || + !(n->nlmsg_flags & NLM_F_CREATE)) goto errout; @@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) return skb->len; if (!tcm->tcm_parent) @@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) goto out; - if ((cops = q->ops->cl_ops) == NULL) + cops = q->ops->cl_ops; + if (!cops) goto errout; if (cops->tcf_chain == NULL) goto errout; @@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; - for (tp=*chain, t=0; tp; tp = tp->next, t++) { - if (t < s_t) continue; + for (tp = *chain, t = 0; tp; tp = tp->next, t++) { + if (t < s_t) + continue; if (TC_H_MAJ(tcm->tcm_info) && TC_H_MAJ(tcm->tcm_info) != tp->prio) continue; @@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) arg.skb = skb; arg.cb = cb; arg.w.stop = 0; - arg.w.skip = cb->args[1]-1; + arg.w.skip = cb->args[1] - 1; arg.w.count = 0; tp->ops->walk(tp, &arg.w); - cb->args[1] = arg.w.count+1; + cb->args[1] = arg.w.count + 1; if (arg.w.stop) break; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index f23d915..8be8872 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -21,14 +21,12 @@ #include #include -struct basic_head -{ +struct basic_head { u32 hgenerator; struct list_head flist; }; -struct basic_filter -{ +struct basic_filter { u32 handle; struct tcf_exts exts; struct tcf_ematch_tree ematches; @@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp) return 0; } -static inline void basic_delete_filter(struct tcf_proto *tp, - struct basic_filter *f) +static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f) { tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(tp, &f->exts); @@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, }; -static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, - unsigned long base, struct nlattr **tb, - struct nlattr *est) +static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, + unsigned long base, struct nlattr **tb, + struct nlattr *est) { int err = -EINVAL; struct tcf_exts e; @@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, } while (--i > 0 && basic_get(tp, head->hgenerator)); if (i <= 0) { - printk(KERN_ERR "Insufficient number of handles\n"); + pr_err("Insufficient number of handles\n"); goto errout; } diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index d49c40f..32a3351 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -56,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, { struct cgroup_cls_state *cs; - if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) return ERR_PTR(-ENOMEM); if (cgrp->parent) @@ -94,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); } -struct cls_cgroup_head -{ +struct cls_cgroup_head { u32 handle; struct tcf_exts exts; struct tcf_ematch_tree ematches; @@ -166,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg) { - struct nlattr *tb[TCA_CGROUP_MAX+1]; + struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = tp->root; struct tcf_ematch_tree t; struct tcf_exts e; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 5b271a1..5eec16e 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb) if (!pskb_network_may_pull(skb, sizeof(*iph))) break; iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_MF|IP_OFFSET)) + if (iph->frag_off & htons(IP_MF | IP_OFFSET)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && @@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb) if (!pskb_network_may_pull(skb, sizeof(*iph))) break; iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_MF|IP_OFFSET)) + if (iph->frag_off & htons(IP_MF | IP_OFFSET)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 93b0a7b..26e7bc4 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -31,14 +31,12 @@ #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) -struct fw_head -{ +struct fw_head { struct fw_filter *ht[HTSIZE]; u32 mask; }; -struct fw_filter -{ +struct fw_filter { struct fw_filter *next; u32 id; struct tcf_result res; @@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = { .police = TCA_FW_POLICE }; -static __inline__ int fw_hash(u32 handle) +static inline int fw_hash(u32 handle) { if (HTSIZE == 4096) return ((handle >> 24) & 0xFFF) ^ @@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle) static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { - struct fw_head *head = (struct fw_head*)tp->root; + struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f; int r; u32 id = skb->mark; if (head != NULL) { id &= head->mask; - for (f=head->ht[fw_hash(id)]; f; f=f->next) { + for (f = head->ht[fw_hash(id)]; f; f = f->next) { if (f->id == id) { *res = f->res; #ifdef CONFIG_NET_CLS_IND @@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, } } else { /* old method */ - if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) { + if (id && (TC_H_MAJ(id) == 0 || + !(TC_H_MAJ(id ^ tp->q->handle)))) { res->classid = id; res->class = 0; return 0; @@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, static unsigned long fw_get(struct tcf_proto *tp, u32 handle) { - struct fw_head *head = (struct fw_head*)tp->root; + struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f; if (head == NULL) return 0; - for (f=head->ht[fw_hash(handle)]; f; f=f->next) { + for (f = head->ht[fw_hash(handle)]; f; f = f->next) { if (f->id == handle) return (unsigned long)f; } @@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp) return 0; } -static inline void -fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) +static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) { tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(tp, &f->exts); @@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp) if (head == NULL) return; - for (h=0; hht[h]) != NULL) { + for (h = 0; h < HTSIZE; h++) { + while ((f = head->ht[h]) != NULL) { head->ht[h] = f->next; fw_delete_filter(tp, f); } @@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp) static int fw_delete(struct tcf_proto *tp, unsigned long arg) { - struct fw_head *head = (struct fw_head*)tp->root; - struct fw_filter *f = (struct fw_filter*)arg; + struct fw_head *head = (struct fw_head *)tp->root; + struct fw_filter *f = (struct fw_filter *)arg; struct fw_filter **fp; if (head == NULL || f == NULL) goto out; - for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { + for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { if (*fp == f) { tcf_tree_lock(tp); *fp = f->next; @@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, struct nlattr **tca, unsigned long *arg) { - struct fw_head *head = (struct fw_head*)tp->root; + struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f = (struct fw_filter *) *arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FW_MAX + 1]; @@ -302,7 +300,7 @@ errout: static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct fw_head *head = (struct fw_head*)tp->root; + struct fw_head *head = (struct fw_head *)tp->root; int h; if (head == NULL) @@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct fw_head *head = (struct fw_head *)tp->root; - struct fw_filter *f = (struct fw_filter*)fh; + struct fw_filter *f = (struct fw_filter *)fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 694dcd8..d580cdf 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -23,34 +23,30 @@ #include /* - 1. For now we assume that route tags < 256. - It allows to use direct table lookups, instead of hash tables. - 2. For now we assume that "from TAG" and "fromdev DEV" statements - are mutually exclusive. - 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" + * 1. For now we assume that route tags < 256. + * It allows to use direct table lookups, instead of hash tables. + * 2. For now we assume that "from TAG" and "fromdev DEV" statements + * are mutually exclusive. + * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" */ -struct route4_fastmap -{ +struct route4_fastmap { struct route4_filter *filter; u32 id; int iif; }; -struct route4_head -{ +struct route4_head { struct route4_fastmap fastmap[16]; - struct route4_bucket *table[256+1]; + struct route4_bucket *table[256 + 1]; }; -struct route4_bucket -{ +struct route4_bucket { /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ - struct route4_filter *ht[16+16+1]; + struct route4_filter *ht[16 + 16 + 1]; }; -struct route4_filter -{ +struct route4_filter { struct route4_filter *next; u32 id; int iif; @@ -61,20 +57,20 @@ struct route4_filter struct route4_bucket *bkt; }; -#define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) +#define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) static const struct tcf_ext_map route_ext_map = { .police = TCA_ROUTE4_POLICE, .action = TCA_ROUTE4_ACT }; -static __inline__ int route4_fastmap_hash(u32 id, int iif) +static inline int route4_fastmap_hash(u32 id, int iif) { - return id&0xF; + return id & 0xF; } -static inline -void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) +static void +route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) { spinlock_t *root_lock = qdisc_root_sleeping_lock(q); @@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) spin_unlock_bh(root_lock); } -static inline void +static void route4_set_fastmap(struct route4_head *head, u32 id, int iif, struct route4_filter *f) { int h = route4_fastmap_hash(id, iif); + head->fastmap[h].id = id; head->fastmap[h].iif = iif; head->fastmap[h].filter = f; } -static __inline__ int route4_hash_to(u32 id) +static inline int route4_hash_to(u32 id) { - return id&0xFF; + return id & 0xFF; } -static __inline__ int route4_hash_from(u32 id) +static inline int route4_hash_from(u32 id) { - return (id>>16)&0xF; + return (id >> 16) & 0xF; } -static __inline__ int route4_hash_iif(int iif) +static inline int route4_hash_iif(int iif) { - return 16 + ((iif>>16)&0xF); + return 16 + ((iif >> 16) & 0xF); } -static __inline__ int route4_hash_wild(void) +static inline int route4_hash_wild(void) { return 32; } @@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void) static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { - struct route4_head *head = (struct route4_head*)tp->root; + struct route4_head *head = (struct route4_head *)tp->root; struct dst_entry *dst; struct route4_bucket *b; struct route4_filter *f; u32 id, h; int iif, dont_cache = 0; - if ((dst = skb_dst(skb)) == NULL) + dst = skb_dst(skb); + if (!dst) goto failure; id = dst->tclassid; if (head == NULL) goto old_method; - iif = ((struct rtable*)dst)->fl.iif; + iif = ((struct rtable *)dst)->fl.iif; h = route4_fastmap_hash(id, iif); if (id == head->fastmap[h].id && @@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, h = route4_hash_to(id); restart: - if ((b = head->table[h]) != NULL) { + b = head->table[h]; + if (b) { for (f = b->ht[route4_hash_from(id)]; f; f = f->next) if (f->id == id) ROUTE4_APPLY_RESULT(); @@ -197,8 +196,9 @@ old_method: static inline u32 to_hash(u32 id) { - u32 h = id&0xFF; - if (id&0x8000) + u32 h = id & 0xFF; + + if (id & 0x8000) h += 256; return h; } @@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id) if (!(id & 0x8000)) { if (id > 255) return 256; - return id&0xF; + return id & 0xF; } - return 16 + (id&0xF); + return 16 + (id & 0xF); } static unsigned long route4_get(struct tcf_proto *tp, u32 handle) { - struct route4_head *head = (struct route4_head*)tp->root; + struct route4_head *head = (struct route4_head *)tp->root; struct route4_bucket *b; struct route4_filter *f; - unsigned h1, h2; + unsigned int h1, h2; if (!head) return 0; @@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) if (h1 > 256) return 0; - h2 = from_hash(handle>>16); + h2 = from_hash(handle >> 16); if (h2 > 32) return 0; - if ((b = head->table[h1]) != NULL) { + b = head->table[h1]; + if (b) { for (f = b->ht[h2]; f; f = f->next) if (f->handle == handle) return (unsigned long)f; @@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static inline void +static void route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) { tcf_unbind_filter(tp, &f->res); @@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp) if (head == NULL) return; - for (h1=0; h1<=256; h1++) { + for (h1 = 0; h1 <= 256; h1++) { struct route4_bucket *b; - if ((b = head->table[h1]) != NULL) { - for (h2=0; h2<=32; h2++) { + b = head->table[h1]; + if (b) { + for (h2 = 0; h2 <= 32; h2++) { struct route4_filter *f; while ((f = b->ht[h2]) != NULL) { @@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp) static int route4_delete(struct tcf_proto *tp, unsigned long arg) { - struct route4_head *head = (struct route4_head*)tp->root; - struct route4_filter **fp, *f = (struct route4_filter*)arg; - unsigned h = 0; + struct route4_head *head = (struct route4_head *)tp->root; + struct route4_filter **fp, *f = (struct route4_filter *)arg; + unsigned int h = 0; struct route4_bucket *b; int i; @@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) h = f->handle; b = f->bkt; - for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) { + for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) { if (*fp == f) { tcf_tree_lock(tp); *fp = f->next; @@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) /* Strip tree */ - for (i=0; i<=32; i++) + for (i = 0; i <= 32; i++) if (b->ht[i]) return 0; @@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, } h1 = to_hash(nhandle); - if ((b = head->table[h1]) == NULL) { + b = head->table[h1]; + if (!b) { err = -ENOBUFS; b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); if (b == NULL) @@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, tcf_tree_unlock(tp); } else { unsigned int h2 = from_hash(nhandle >> 16); + err = -EEXIST; for (fp = b->ht[h2]; fp; fp = fp->next) if (fp->handle == f->handle) @@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - if ((f = (struct route4_filter*)*arg) != NULL) { + f = (struct route4_filter *)*arg; + if (f) { if (f->handle != handle && handle) return -EINVAL; @@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base, reinsert: h = from_hash(f->handle >> 16); - for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next) + for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next) if (f->handle < f1->handle) break; @@ -492,7 +497,8 @@ reinsert: if (old_handle && f->handle != old_handle) { th = to_hash(old_handle); h = from_hash(old_handle >> 16); - if ((b = head->table[th]) != NULL) { + b = head->table[th]; + if (b) { for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { if (*fp == f) { *fp = f->next; @@ -515,7 +521,7 @@ errout: static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct route4_head *head = tp->root; - unsigned h, h1; + unsigned int h, h1; if (head == NULL) arg->stop = 1; @@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) static int route4_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct route4_filter *f = (struct route4_filter*)fh; + struct route4_filter *f = (struct route4_filter *)fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; u32 id; @@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, if (nest == NULL) goto nla_put_failure; - if (!(f->handle&0x8000)) { - id = f->id&0xFF; + if (!(f->handle & 0x8000)) { + id = f->id & 0xFF; NLA_PUT_U32(skb, TCA_ROUTE4_TO, id); } - if (f->handle&0x80000000) { - if ((f->handle>>16) != 0xFFFF) + if (f->handle & 0x80000000) { + if ((f->handle >> 16) != 0xFFFF) NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif); } else { - id = f->id>>16; + id = f->id >> 16; NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id); } if (f->res.classid) diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 425a179..402c44b 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -66,28 +66,25 @@ powerful classification engine. */ -struct rsvp_head -{ +struct rsvp_head { u32 tmap[256/32]; u32 hgenerator; u8 tgenerator; struct rsvp_session *ht[256]; }; -struct rsvp_session -{ +struct rsvp_session { struct rsvp_session *next; __be32 dst[RSVP_DST_LEN]; struct tc_rsvp_gpi dpi; u8 protocol; u8 tunnelid; /* 16 (src,sport) hash slots, and one wildcard source slot */ - struct rsvp_filter *ht[16+1]; + struct rsvp_filter *ht[16 + 1]; }; -struct rsvp_filter -{ +struct rsvp_filter { struct rsvp_filter *next; __be32 src[RSVP_DST_LEN]; struct tc_rsvp_gpi spi; @@ -100,17 +97,19 @@ struct rsvp_filter struct rsvp_session *sess; }; -static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) +static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) { - unsigned h = (__force __u32)dst[RSVP_DST_LEN-1]; + unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1]; + h ^= h>>16; h ^= h>>8; return (h ^ protocol ^ tunnelid) & 0xFF; } -static __inline__ unsigned hash_src(__be32 *src) +static inline unsigned int hash_src(__be32 *src) { - unsigned h = (__force __u32)src[RSVP_DST_LEN-1]; + unsigned int h = (__force __u32)src[RSVP_DST_LEN-1]; + h ^= h>>16; h ^= h>>8; h ^= h>>4; @@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = { static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { - struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; + struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; struct rsvp_session *s; struct rsvp_filter *f; - unsigned h1, h2; + unsigned int h1, h2; __be32 *dst, *src; u8 protocol; u8 tunnelid = 0; @@ -162,13 +161,13 @@ restart: src = &nhptr->saddr.s6_addr32[0]; dst = &nhptr->daddr.s6_addr32[0]; protocol = nhptr->nexthdr; - xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr); + xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr); #else src = &nhptr->saddr; dst = &nhptr->daddr; protocol = nhptr->protocol; - xprt = ((u8*)nhptr) + (nhptr->ihl<<2); - if (nhptr->frag_off & htons(IP_MF|IP_OFFSET)) + xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); + if (nhptr->frag_off & htons(IP_MF | IP_OFFSET)) return -1; #endif @@ -176,10 +175,10 @@ restart: h2 = hash_src(src); for (s = sht[h1]; s; s = s->next) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && + if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] && protocol == s->protocol && !(s->dpi.mask & - (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) && + (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) && #if RSVP_DST_LEN == 4 dst[0] == s->dst[0] && dst[1] == s->dst[1] && @@ -188,8 +187,8 @@ restart: tunnelid == s->tunnelid) { for (f = s->ht[h2]; f; f = f->next) { - if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] && - !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key)) + if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] && + !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key)) #if RSVP_DST_LEN == 4 && src[0] == f->src[0] && @@ -205,7 +204,7 @@ matched: return 0; tunnelid = f->res.classid; - nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); + nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr)); goto restart; } } @@ -224,11 +223,11 @@ matched: static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) { - struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; + struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; struct rsvp_session *s; struct rsvp_filter *f; - unsigned h1 = handle&0xFF; - unsigned h2 = (handle>>8)&0xFF; + unsigned int h1 = handle & 0xFF; + unsigned int h2 = (handle >> 8) & 0xFF; if (h2 > 16) return 0; @@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } -static inline void +static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) { tcf_unbind_filter(tp, &f->res); @@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp) sht = data->ht; - for (h1=0; h1<256; h1++) { + for (h1 = 0; h1 < 256; h1++) { struct rsvp_session *s; while ((s = sht[h1]) != NULL) { sht[h1] = s->next; - for (h2=0; h2<=16; h2++) { + for (h2 = 0; h2 <= 16; h2++) { struct rsvp_filter *f; while ((f = s->ht[h2]) != NULL) { @@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp) static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) { - struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg; - unsigned h = f->handle; + struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg; + unsigned int h = f->handle; struct rsvp_session **sp; struct rsvp_session *s = f->sess; int i; - for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) { + for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) { if (*fp == f) { tcf_tree_lock(tp); *fp = f->next; @@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) /* Strip tree */ - for (i=0; i<=16; i++) + for (i = 0; i <= 16; i++) if (s->ht[i]) return 0; /* OK, session has no flows */ - for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF]; + for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF]; *sp; sp = &(*sp)->next) { if (*sp == s) { tcf_tree_lock(tp); @@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) return 0; } -static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) +static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) { struct rsvp_head *data = tp->root; int i = 0xFFFF; while (i-- > 0) { u32 h; + if ((data->hgenerator += 0x10000) == 0) data->hgenerator = 0x10000; h = data->hgenerator|salt; @@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) static int tunnel_bts(struct rsvp_head *data) { - int n = data->tgenerator>>5; - u32 b = 1<<(data->tgenerator&0x1F); + int n = data->tgenerator >> 5; + u32 b = 1 << (data->tgenerator & 0x1F); - if (data->tmap[n]&b) + if (data->tmap[n] & b) return 0; data->tmap[n] |= b; return 1; @@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data) memset(tmap, 0, sizeof(tmap)); - for (h1=0; h1<256; h1++) { + for (h1 = 0; h1 < 256; h1++) { struct rsvp_session *s; for (s = sht[h1]; s; s = s->next) { - for (h2=0; h2<=16; h2++) { + for (h2 = 0; h2 <= 16; h2++) { struct rsvp_filter *f; for (f = s->ht[h2]; f; f = f->next) { @@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data) { int i, k; - for (k=0; k<2; k++) { - for (i=255; i>0; i--) { + for (k = 0; k < 2; k++) { + for (i = 255; i > 0; i--) { if (++data->tgenerator == 0) data->tgenerator = 1; if (tunnel_bts(data)) @@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, struct nlattr *opt = tca[TCA_OPTIONS-1]; struct nlattr *tb[TCA_RSVP_MAX + 1]; struct tcf_exts e; - unsigned h1, h2; + unsigned int h1, h2; __be32 *dst; int err; @@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - if ((f = (struct rsvp_filter*)*arg) != NULL) { + f = (struct rsvp_filter *)*arg; + if (f) { /* Node exists: adjust only classid */ if (f->handle != handle && handle) @@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, goto errout; } - for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) { + for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) { if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && pinfo && pinfo->protocol == s->protocol && memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && @@ -523,7 +524,7 @@ insert: tcf_exts_change(tp, &f->exts, &e); for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) - if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask) + if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask) break; f->next = *fp; wmb(); @@ -567,7 +568,7 @@ errout2: static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct rsvp_head *head = tp->root; - unsigned h, h1; + unsigned int h, h1; if (arg->stop) return; @@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct rsvp_filter *f = (struct rsvp_filter*)fh; + struct rsvp_filter *f = (struct rsvp_filter *)fh; struct rsvp_session *s; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); if (f->res.classid) NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid); - if (((f->handle>>8)&0xFF) != 16) + if (((f->handle >> 8) & 0xFF) != 16) NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 20ef330..36667fa 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, * of the hashing index is below the threshold. */ if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) - cp.hash = (cp.mask >> cp.shift)+1; + cp.hash = (cp.mask >> cp.shift) + 1; else cp.hash = DEFAULT_HASH_SIZE; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b0c2a82..966920c 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -42,8 +42,7 @@ #include #include -struct tc_u_knode -{ +struct tc_u_knode { struct tc_u_knode *next; u32 handle; struct tc_u_hnode *ht_up; @@ -63,19 +62,17 @@ struct tc_u_knode struct tc_u32_sel sel; }; -struct tc_u_hnode -{ +struct tc_u_hnode { struct tc_u_hnode *next; u32 handle; u32 prio; struct tc_u_common *tp_c; int refcnt; - unsigned divisor; + unsigned int divisor; struct tc_u_knode *ht[1]; }; -struct tc_u_common -{ +struct tc_u_common { struct tc_u_hnode *hlist; struct Qdisc *q; int refcnt; @@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = { .police = TCA_U32_POLICE }; -static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) +static inline unsigned int u32_hash_fold(__be32 key, + const struct tc_u32_sel *sel, + u8 fshift) { - unsigned h = ntohl(key & sel->hmask)>>fshift; + unsigned int h = ntohl(key & sel->hmask) >> fshift; return h; } @@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re unsigned int off; } stack[TC_U32_MAXDEPTH]; - struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; + struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root; unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; @@ -120,7 +119,7 @@ next_knode: struct tc_u32_key *key = n->sel.keys; #ifdef CONFIG_CLS_U32_PERF - n->pf->rcnt +=1; + n->pf->rcnt += 1; j = 0; #endif @@ -133,7 +132,7 @@ next_knode: } #endif - for (i = n->sel.nkeys; i>0; i--, key++) { + for (i = n->sel.nkeys; i > 0; i--, key++) { int toff = off + key->off + (off2 & key->offmask); __be32 *data, _data; @@ -148,13 +147,13 @@ next_knode: goto next_knode; } #ifdef CONFIG_CLS_U32_PERF - n->pf->kcnts[j] +=1; + n->pf->kcnts[j] += 1; j++; #endif } if (n->ht_down == NULL) { check_terminal: - if (n->sel.flags&TC_U32_TERMINAL) { + if (n->sel.flags & TC_U32_TERMINAL) { *res = n->res; #ifdef CONFIG_NET_CLS_IND @@ -164,7 +163,7 @@ check_terminal: } #endif #ifdef CONFIG_CLS_U32_PERF - n->pf->rhit +=1; + n->pf->rhit += 1; #endif r = tcf_exts_exec(skb, &n->exts, res); if (r < 0) { @@ -197,10 +196,10 @@ check_terminal: sel = ht->divisor & u32_hash_fold(*data, &n->sel, n->fshift); } - if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) + if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) goto next_ht; - if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { + if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; if (n->sel.flags & TC_U32_VAROFFSET) { __be16 *data, _data; @@ -215,7 +214,7 @@ check_terminal: } off2 &= ~3; } - if (n->sel.flags&TC_U32_EAT) { + if (n->sel.flags & TC_U32_EAT) { off += off2; off2 = 0; } @@ -236,11 +235,11 @@ out: deadloop: if (net_ratelimit()) - printk(KERN_WARNING "cls_u32: dead loop\n"); + pr_warning("cls_u32: dead loop\n"); return -1; } -static __inline__ struct tc_u_hnode * +static struct tc_u_hnode * u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; @@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) return ht; } -static __inline__ struct tc_u_knode * +static struct tc_u_knode * u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { - unsigned sel; + unsigned int sel; struct tc_u_knode *n = NULL; sel = TC_U32_HASH(handle); @@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c) do { if (++tp_c->hgenerator == 0x7FF) tp_c->hgenerator = 1; - } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); + } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; } @@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_knode *n; - unsigned h; + unsigned int h; - for (h=0; h<=ht->divisor; h++) { + for (h = 0; h <= ht->divisor; h++) { while ((n = ht->ht[h]) != NULL) { ht->ht[h] = n->next; @@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp) static int u32_delete(struct tcf_proto *tp, unsigned long arg) { - struct tc_u_hnode *ht = (struct tc_u_hnode*)arg; + struct tc_u_hnode *ht = (struct tc_u_hnode *)arg; if (ht == NULL) return 0; if (TC_U32_KEY(ht->handle)) - return u32_delete_key(tp, (struct tc_u_knode*)ht); + return u32_delete_key(tp, (struct tc_u_knode *)ht); if (tp->root == ht) return -EINVAL; @@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) { struct tc_u_knode *n; - unsigned i = 0x7FF; + unsigned int i = 0x7FF; - for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next) + for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next) if (i < TC_U32_NODE(n->handle)) i = TC_U32_NODE(n->handle); i++; - return handle|(i>0xFFF ? 0xFFF : i); + return handle | (i > 0xFFF ? 0xFFF : i); } static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { @@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, if (err < 0) return err; - if ((n = (struct tc_u_knode*)*arg) != NULL) { + n = (struct tc_u_knode *)*arg; + if (n) { if (TC_U32_KEY(n->handle) == 0) return -EINVAL; @@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, } if (tb[TCA_U32_DIVISOR]) { - unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); + unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); if (--divisor > 0x100) return -EINVAL; @@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, if (handle == 0) return -ENOMEM; } - ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); + ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; ht->tp_c = tp_c; @@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; struct tc_u_knode *n; - unsigned h; + unsigned int h; if (arg->stop) return; @@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) static int u32_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct tc_u_knode *n = (struct tc_u_knode*)fh; + struct tc_u_knode *n = (struct tc_u_knode *)fh; struct nlattr *nest; if (n == NULL) @@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; if (TC_U32_KEY(n->handle) == 0) { - struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; - u32 divisor = ht->divisor+1; + struct tc_u_hnode *ht = (struct tc_u_hnode *)fh; + u32 divisor = ht->divisor + 1; + NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor); } else { NLA_PUT(skb, TCA_U32_SEL, @@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; #ifdef CONFIG_NET_CLS_IND - if(strlen(n->indev)) + if (strlen(n->indev)) NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev); #endif #ifdef CONFIG_CLS_U32_PERF diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index bc45039..1c8360a 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, return 0; switch (cmp->align) { - case TCF_EM_ALIGN_U8: - val = *ptr; - break; + case TCF_EM_ALIGN_U8: + val = *ptr; + break; - case TCF_EM_ALIGN_U16: - val = get_unaligned_be16(ptr); + case TCF_EM_ALIGN_U16: + val = get_unaligned_be16(ptr); - if (cmp_needs_transformation(cmp)) - val = be16_to_cpu(val); - break; + if (cmp_needs_transformation(cmp)) + val = be16_to_cpu(val); + break; - case TCF_EM_ALIGN_U32: - /* Worth checking boundries? The branching seems - * to get worse. Visit again. */ - val = get_unaligned_be32(ptr); + case TCF_EM_ALIGN_U32: + /* Worth checking boundries? The branching seems + * to get worse. Visit again. + */ + val = get_unaligned_be32(ptr); - if (cmp_needs_transformation(cmp)) - val = be32_to_cpu(val); - break; + if (cmp_needs_transformation(cmp)) + val = be32_to_cpu(val); + break; - default: - return 0; + default: + return 0; } if (cmp->mask) val &= cmp->mask; switch (cmp->opnd) { - case TCF_EM_OPND_EQ: - return val == cmp->val; - case TCF_EM_OPND_LT: - return val < cmp->val; - case TCF_EM_OPND_GT: - return val > cmp->val; + case TCF_EM_OPND_EQ: + return val == cmp->val; + case TCF_EM_OPND_LT: + return val < cmp->val; + case TCF_EM_OPND_GT: + return val > cmp->val; } return 0; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 34da5e2..7af1f65 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -73,21 +73,18 @@ #include #include -struct meta_obj -{ +struct meta_obj { unsigned long value; unsigned int len; }; -struct meta_value -{ +struct meta_value { struct tcf_meta_val hdr; unsigned long val; unsigned int len; }; -struct meta_match -{ +struct meta_match { struct meta_value lvalue; struct meta_value rvalue; }; @@ -483,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend) * Meta value collectors assignment table **************************************************************************/ -struct meta_ops -{ +struct meta_ops { void (*get)(struct sk_buff *, struct tcf_pkt_info *, struct meta_value *, struct meta_obj *, int *); }; @@ -494,7 +490,7 @@ struct meta_ops /* Meta value operations table listing all meta value collectors and * assigns them to a type and meta id. */ -static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { +static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = { [TCF_META_TYPE_VAR] = { [META_ID(DEV)] = META_FUNC(var_dev), [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), @@ -550,7 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { } }; -static inline struct meta_ops * meta_ops(struct meta_value *val) +static inline struct meta_ops *meta_ops(struct meta_value *val) { return &__meta_ops[meta_type(val)][meta_id(val)]; } @@ -649,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv) { if (v->len == sizeof(unsigned long)) NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val); - else if (v->len == sizeof(u32)) { + else if (v->len == sizeof(u32)) NLA_PUT_U32(skb, tlv, v->val); - } return 0; @@ -663,8 +658,7 @@ nla_put_failure: * Type specific operations table **************************************************************************/ -struct meta_type_ops -{ +struct meta_type_ops { void (*destroy)(struct meta_value *); int (*compare)(struct meta_obj *, struct meta_obj *); int (*change)(struct meta_value *, struct nlattr *); @@ -672,7 +666,7 @@ struct meta_type_ops int (*dump)(struct sk_buff *, struct meta_value *, int); }; -static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { +static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { [TCF_META_TYPE_VAR] = { .destroy = meta_var_destroy, .compare = meta_var_compare, @@ -688,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { } }; -static inline struct meta_type_ops * meta_type_ops(struct meta_value *v) +static inline struct meta_type_ops *meta_type_ops(struct meta_value *v) { return &__meta_type_ops[meta_type(v)]; } @@ -713,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info, return err; if (meta_type_ops(v)->apply_extras) - meta_type_ops(v)->apply_extras(v, dst); + meta_type_ops(v)->apply_extras(v, dst); return 0; } @@ -732,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m, r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); switch (meta->lvalue.hdr.op) { - case TCF_EM_OPND_EQ: - return !r; - case TCF_EM_OPND_LT: - return r < 0; - case TCF_EM_OPND_GT: - return r > 0; + case TCF_EM_OPND_EQ: + return !r; + case TCF_EM_OPND_LT: + return r < 0; + case TCF_EM_OPND_GT: + return r > 0; } return 0; @@ -771,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla) static inline int meta_is_supported(struct meta_value *val) { - return (!meta_id(val) || meta_ops(val)->get); + return !meta_id(val) || meta_ops(val)->get; } static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 1a4176a..a3bed07 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -18,8 +18,7 @@ #include #include -struct nbyte_data -{ +struct nbyte_data { struct tcf_em_nbyte hdr; char pattern[0]; }; diff --git a/net/sched/em_text.c b/net/sched/em_text.c index ea8f566..15d353d 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -19,8 +19,7 @@ #include #include -struct text_match -{ +struct text_match { u16 from_offset; u16 to_offset; u8 from_layer; diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 953f147..797bdb8 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em, if (!tcf_valid_offset(skb, ptr, sizeof(u32))) return 0; - return !(((*(__be32*) ptr) ^ key->val) & key->mask); + return !(((*(__be32 *) ptr) ^ key->val) & key->mask); } static struct tcf_ematch_ops em_u32_ops = { diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 5e37da9..88d93eb 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -93,7 +93,7 @@ static LIST_HEAD(ematch_ops); static DEFINE_RWLOCK(ematch_mod_lock); -static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind) +static struct tcf_ematch_ops *tcf_em_lookup(u16 kind) { struct tcf_ematch_ops *e = NULL; @@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops) } EXPORT_SYMBOL(tcf_em_unregister); -static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree, - int index) +static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree, + int index) { return &tree->matches[index]; } @@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em_hdr->kind == TCF_EM_CONTAINER) { /* Special ematch called "container", carries an index - * referencing an external ematch sequence. */ + * referencing an external ematch sequence. + */ u32 ref; if (data_len < sizeof(ref)) @@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp, goto errout; /* We do not allow backward jumps to avoid loops and jumps - * to our own position are of course illegal. */ + * to our own position are of course illegal. + */ if (ref <= idx) goto errout; @@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp, * which automatically releases the reference again, therefore * the module MUST not be given back under any circumstances * here. Be aware, the destroy function assumes that the - * module is held if the ops field is non zero. */ + * module is held if the ops field is non zero. + */ em->ops = tcf_em_lookup(em_hdr->kind); if (em->ops == NULL) { @@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em->ops) { /* We dropped the RTNL mutex in order to * perform the module load. Tell the caller - * to replay the request. */ + * to replay the request. + */ module_put(em->ops->owner); err = -EAGAIN; } @@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp, } /* ematch module provides expected length of data, so we - * can do a basic sanity check. */ + * can do a basic sanity check. + */ if (em->ops->datalen && data_len < em->ops->datalen) goto errout; @@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp, * TCF_EM_SIMPLE may be specified stating that the * data only consists of a u32 integer and the module * does not expected a memory reference but rather - * the value carried. */ + * the value carried. + */ if (em_hdr->flags & TCF_EM_SIMPLE) { if (data_len < sizeof(u32)) goto errout; @@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, * The array of rt attributes is parsed in the order as they are * provided, their type must be incremental from 1 to n. Even * if it does not serve any real purpose, a failure of sticking - * to this policy will result in parsing failure. */ + * to this policy will result in parsing failure. + */ for (idx = 0; nla_ok(rt_match, list_len); idx++) { err = -EINVAL; @@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, /* Check if the number of matches provided by userspace actually * complies with the array of matches. The number was used for * the validation of references and a mismatch could lead to - * undefined references during the matching process. */ + * undefined references during the matching process. + */ if (idx != tree_hdr->nmatches) { err = -EINVAL; goto errout_abort; @@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) .flags = em->flags }; - NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr); + NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr); if (em->ops && em->ops->dump) { if (em->ops->dump(skb, em) < 0) @@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em, struct tcf_pkt_info *info) { int r = em->ops->match(skb, em, info); + return tcf_em_is_inverted(em) ? !r : r; } @@ -527,8 +536,8 @@ pop_stack: stack_overflow: if (net_ratelimit()) - printk(KERN_WARNING "tc ematch: local stack overflow," - " increase NET_EMATCH_STACK\n"); + pr_warning("tc ematch: local stack overflow," + " increase NET_EMATCH_STACK\n"); return -1; } EXPORT_SYMBOL(__tcf_em_tree_match); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b22ca2d..36ac0ec 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops) int err = -ENOENT; write_lock(&qdisc_mod_lock); - for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) + for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) if (q == qops) break; if (q) { @@ -321,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) if (!tab || --tab->refcnt) return; - for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) { + for (rtabp = &qdisc_rtab_list; + (rtab = *rtabp) != NULL; + rtabp = &rtab->next) { if (rtab == tab) { *rtabp = rtab->next; kfree(rtab); @@ -459,9 +461,8 @@ EXPORT_SYMBOL(qdisc_calculate_pkt_len); void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) { if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { - printk(KERN_WARNING - "%s: %s qdisc %X: is non-work-conserving?\n", - txt, qdisc->ops->id, qdisc->handle >> 16); + pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); qdisc->flags |= TCQ_F_WARN_NONWC; } } @@ -625,7 +626,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) autohandle = TC_H_MAKE(0x80000000U, 0); } while (qdisc_lookup(dev, autohandle) && --i > 0); - return i>0 ? autohandle : 0; + return i > 0 ? autohandle : 0; } void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) @@ -915,9 +916,8 @@ out: return 0; } -struct check_loop_arg -{ - struct qdisc_walker w; +struct check_loop_arg { + struct qdisc_walker w; struct Qdisc *p; int depth; }; @@ -970,7 +970,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; - if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -980,12 +981,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (clid) { if (clid != TC_H_ROOT) { if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { - if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) + p = qdisc_lookup(dev, TC_H_MAJ(clid)); + if (!p) return -ENOENT; q = qdisc_leaf(p, clid); - } else { /* ingress */ - if (dev_ingress_queue(dev)) - q = dev_ingress_queue(dev)->qdisc_sleeping; + } else if (dev_ingress_queue(dev)) { + q = dev_ingress_queue(dev)->qdisc_sleeping; } } else { q = dev->qdisc; @@ -996,7 +997,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (tcm->tcm_handle && q->handle != tcm->tcm_handle) return -EINVAL; } else { - if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) + q = qdisc_lookup(dev, tcm->tcm_handle); + if (!q) return -ENOENT; } @@ -1008,7 +1010,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) return -EINVAL; if (q->handle == 0) return -ENOENT; - if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) + err = qdisc_graft(dev, p, skb, n, clid, NULL, q); + if (err != 0) return err; } else { qdisc_notify(net, skb, n, clid, NULL, q); @@ -1017,7 +1020,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } /* - Create/change qdisc. + * Create/change qdisc. */ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) @@ -1036,7 +1039,8 @@ replay: clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1046,12 +1050,12 @@ replay: if (clid) { if (clid != TC_H_ROOT) { if (clid != TC_H_INGRESS) { - if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) + p = qdisc_lookup(dev, TC_H_MAJ(clid)); + if (!p) return -ENOENT; q = qdisc_leaf(p, clid); - } else { /* ingress */ - if (dev_ingress_queue_create(dev)) - q = dev_ingress_queue(dev)->qdisc_sleeping; + } else if (dev_ingress_queue_create(dev)) { + q = dev_ingress_queue(dev)->qdisc_sleeping; } } else { q = dev->qdisc; @@ -1063,13 +1067,14 @@ replay: if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { if (tcm->tcm_handle) { - if (q && !(n->nlmsg_flags&NLM_F_REPLACE)) + if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) return -EEXIST; if (TC_H_MIN(tcm->tcm_handle)) return -EINVAL; - if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) + q = qdisc_lookup(dev, tcm->tcm_handle); + if (!q) goto create_n_graft; - if (n->nlmsg_flags&NLM_F_EXCL) + if (n->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) return -EINVAL; @@ -1079,7 +1084,7 @@ replay: atomic_inc(&q->refcnt); goto graft; } else { - if (q == NULL) + if (!q) goto create_n_graft; /* This magic test requires explanation. @@ -1101,9 +1106,9 @@ replay: * For now we select create/graft, if * user gave KIND, which does not match existing. */ - if ((n->nlmsg_flags&NLM_F_CREATE) && - (n->nlmsg_flags&NLM_F_REPLACE) && - ((n->nlmsg_flags&NLM_F_EXCL) || + if ((n->nlmsg_flags & NLM_F_CREATE) && + (n->nlmsg_flags & NLM_F_REPLACE) && + ((n->nlmsg_flags & NLM_F_EXCL) || (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)))) goto create_n_graft; @@ -1118,7 +1123,7 @@ replay: /* Change qdisc parameters */ if (q == NULL) return -ENOENT; - if (n->nlmsg_flags&NLM_F_EXCL) + if (n->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) return -EINVAL; @@ -1128,7 +1133,7 @@ replay: return err; create_n_graft: - if (!(n->nlmsg_flags&NLM_F_CREATE)) + if (!(n->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) { if (dev_ingress_queue(dev)) @@ -1234,16 +1239,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (old && !tc_qdisc_dump_ignore(old)) { - if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) + if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, + 0, RTM_DELQDISC) < 0) goto err_out; } if (new && !tc_qdisc_dump_ignore(new)) { - if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) + if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, + old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } if (skb->len) - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); err_out: kfree_skb(skb); @@ -1275,7 +1283,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; continue; } - if (!tc_qdisc_dump_ignore(q) && + if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; @@ -1356,7 +1364,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1391,9 +1400,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) qid = dev->qdisc->handle; /* Now qid is genuine qdisc handle consistent - both with parent and child. - - TC_H_MAJ(pid) still may be unspecified, complete it now. + * both with parent and child. + * + * TC_H_MAJ(pid) still may be unspecified, complete it now. */ if (pid) pid = TC_H_MAKE(qid, pid); @@ -1403,7 +1412,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } /* OK. Locate qdisc */ - if ((q = qdisc_lookup(dev, qid)) == NULL) + q = qdisc_lookup(dev, qid); + if (!q) return -ENOENT; /* An check that it supports classes */ @@ -1423,13 +1433,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (cl == 0) { err = -ENOENT; - if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE)) + if (n->nlmsg_type != RTM_NEWTCLASS || + !(n->nlmsg_flags & NLM_F_CREATE)) goto out; } else { switch (n->nlmsg_type) { case RTM_NEWTCLASS: err = -EEXIST; - if (n->nlmsg_flags&NLM_F_EXCL) + if (n->nlmsg_flags & NLM_F_EXCL) goto out; break; case RTM_DELTCLASS: @@ -1521,14 +1532,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } -struct qdisc_dump_args -{ - struct qdisc_walker w; - struct sk_buff *skb; - struct netlink_callback *cb; +struct qdisc_dump_args { + struct qdisc_walker w; + struct sk_buff *skb; + struct netlink_callback *cb; }; static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) @@ -1590,7 +1601,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { - struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); + struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh); struct net *net = sock_net(skb->sk); struct netdev_queue *dev_queue; struct net_device *dev; @@ -1598,7 +1609,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) + dev = dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) return 0; s_t = cb->args[0]; @@ -1621,19 +1633,22 @@ done: } /* Main classifier routine: scans classifier chain attached - to this qdisc, (optionally) tests for protocol and asks - specific classifiers. + * to this qdisc, (optionally) tests for protocol and asks + * specific classifiers. */ int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { __be16 protocol = skb->protocol; - int err = 0; + int err; for (; tp; tp = tp->next) { - if ((tp->protocol == protocol || - tp->protocol == htons(ETH_P_ALL)) && - (err = tp->classify(skb, tp, res)) >= 0) { + if (tp->protocol != protocol && + tp->protocol != htons(ETH_P_ALL)) + continue; + err = tp->classify(skb, tp, res); + + if (err >= 0) { #ifdef CONFIG_NET_CLS_ACT if (err != TC_ACT_RECLASSIFY && skb->tc_verd) skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); @@ -1664,11 +1679,11 @@ reclassify: if (verd++ >= MAX_REC_LOOP) { if (net_ratelimit()) - printk(KERN_NOTICE - "%s: packet reclassify loop" + pr_notice("%s: packet reclassify loop" " rule prio %u protocol %02x\n", - tp->q->ops->id, - tp->prio & 0xffff, ntohs(tp->protocol)); + tp->q->ops->id, + tp->prio & 0xffff, + ntohs(tp->protocol)); return TC_ACT_SHOT; } skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); @@ -1761,7 +1776,7 @@ static int __init pktsched_init(void) err = register_pernet_subsys(&psched_net_ops); if (err) { - printk(KERN_ERR "pktsched_init: " + pr_err("pktsched_init: " "cannot initialize per netns operations\n"); return err; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 943d733..3f08158 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -319,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) * creation), and one for the reference held when calling delete. */ if (flow->ref < 2) { - printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); + pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); return -EINVAL; } if (flow->ref > 2) @@ -384,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } flow = NULL; - done: - ; +done: + ; } - if (!flow) + if (!flow) { flow = &p->link; - else { + } else { if (flow->vcc) ATM_SKB(skb)->atm_options = flow->vcc->atm_options; /*@@@ looks good ... but it's not supposed to work :-) */ @@ -576,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch) list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) - printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, - flow->ref); + pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); atm_tc_put(sch, (unsigned long)flow); } tasklet_kill(&p->task); @@ -616,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, } if (flow->excess) NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid); - else { + else NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0); - } nla_nest_end(skb, nest); return skb->len; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index c80d1c2..4aaf44c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -72,8 +72,7 @@ struct cbq_sched_data; -struct cbq_class -{ +struct cbq_class { struct Qdisc_class_common common; struct cbq_class *next_alive; /* next class with backlog in this priority band */ @@ -139,19 +138,18 @@ struct cbq_class int refcnt; int filters; - struct cbq_class *defaults[TC_PRIO_MAX+1]; + struct cbq_class *defaults[TC_PRIO_MAX + 1]; }; -struct cbq_sched_data -{ +struct cbq_sched_data { struct Qdisc_class_hash clhash; /* Hash table of all classes */ - int nclasses[TC_CBQ_MAXPRIO+1]; - unsigned quanta[TC_CBQ_MAXPRIO+1]; + int nclasses[TC_CBQ_MAXPRIO + 1]; + unsigned int quanta[TC_CBQ_MAXPRIO + 1]; struct cbq_class link; - unsigned activemask; - struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes + unsigned int activemask; + struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes with backlog */ #ifdef CONFIG_NET_CLS_ACT @@ -162,7 +160,7 @@ struct cbq_sched_data int tx_len; psched_time_t now; /* Cached timestamp */ psched_time_t now_rt; /* Cached real time */ - unsigned pmask; + unsigned int pmask; struct hrtimer delay_timer; struct qdisc_watchdog watchdog; /* Watchdog timer, @@ -175,9 +173,9 @@ struct cbq_sched_data }; -#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) +#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) -static __inline__ struct cbq_class * +static inline struct cbq_class * cbq_class_lookup(struct cbq_sched_data *q, u32 classid) { struct Qdisc_class_common *clc; @@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid) static struct cbq_class * cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) { - struct cbq_class *cl, *new; + struct cbq_class *cl; - for (cl = this->tparent; cl; cl = cl->tparent) - if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this) - return new; + for (cl = this->tparent; cl; cl = cl->tparent) { + struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; + if (new != NULL && new != this) + return new; + } return NULL; } #endif /* Classify packet. The procedure is pretty complicated, but - it allows us to combine link sharing and priority scheduling - transparently. - - Namely, you can put link sharing rules (f.e. route based) at root of CBQ, - so that it resolves to split nodes. Then packets are classified - by logical priority, or a more specific classifier may be attached - to the split node. + * it allows us to combine link sharing and priority scheduling + * transparently. + * + * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, + * so that it resolves to split nodes. Then packets are classified + * by logical priority, or a more specific classifier may be attached + * to the split node. */ static struct cbq_class * @@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) /* * Step 1. If skb->priority points to one of our classes, use it. */ - if (TC_H_MAJ(prio^sch->handle) == 0 && + if (TC_H_MAJ(prio ^ sch->handle) == 0 && (cl = cbq_class_lookup(q, prio)) != NULL) return cl; @@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) goto fallback; - if ((cl = (void*)res.class) == NULL) { + cl = (void *)res.class; + if (!cl) { if (TC_H_MAJ(res.classid)) cl = cbq_class_lookup(q, res.classid); - else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) + else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) cl = defmap[TC_PRIO_BESTEFFORT]; if (cl == NULL || cl->level >= head->level) @@ -282,7 +283,7 @@ fallback: * Step 4. No success... */ if (TC_H_MAJ(prio) == 0 && - !(cl = head->defaults[prio&TC_PRIO_MAX]) && + !(cl = head->defaults[prio & TC_PRIO_MAX]) && !(cl = head->defaults[TC_PRIO_BESTEFFORT])) return head; @@ -290,12 +291,12 @@ fallback: } /* - A packet has just been enqueued on the empty class. - cbq_activate_class adds it to the tail of active class list - of its priority band. + * A packet has just been enqueued on the empty class. + * cbq_activate_class adds it to the tail of active class list + * of its priority band. */ -static __inline__ void cbq_activate_class(struct cbq_class *cl) +static inline void cbq_activate_class(struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(cl->qdisc); int prio = cl->cpriority; @@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl) } /* - Unlink class from active chain. - Note that this same procedure is done directly in cbq_dequeue* - during round-robin procedure. + * Unlink class from active chain. + * Note that this same procedure is done directly in cbq_dequeue* + * during round-robin procedure. */ static void cbq_deactivate_class(struct cbq_class *this) @@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) { int toplevel = q->toplevel; - if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) { + if (toplevel > cl->level && !(cl->q->flags & TCQ_F_THROTTLED)) { psched_time_t now; psched_tdiff_t incr; @@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) q->toplevel = cl->level; return; } - } while ((cl=cl->borrow) != NULL && toplevel > cl->level); + } while ((cl = cl->borrow) != NULL && toplevel > cl->level); } } @@ -418,11 +419,11 @@ static void cbq_ovl_classic(struct cbq_class *cl) delay += cl->offtime; /* - Class goes to sleep, so that it will have no - chance to work avgidle. Let's forgive it 8) - - BTW cbq-2.0 has a crap in this - place, apparently they forgot to shift it by cl->ewma_log. + * Class goes to sleep, so that it will have no + * chance to work avgidle. Let's forgive it 8) + * + * BTW cbq-2.0 has a crap in this + * place, apparently they forgot to shift it by cl->ewma_log. */ if (cl->avgidle < 0) delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); @@ -439,8 +440,8 @@ static void cbq_ovl_classic(struct cbq_class *cl) q->wd_expires = delay; /* Dirty work! We must schedule wakeups based on - real available rate, rather than leaf rate, - which may be tiny (even zero). + * real available rate, rather than leaf rate, + * which may be tiny (even zero). */ if (q->toplevel == TC_CBQ_MAXLEVEL) { struct cbq_class *b; @@ -460,7 +461,7 @@ static void cbq_ovl_classic(struct cbq_class *cl) } /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when - they go overlimit + * they go overlimit */ static void cbq_ovl_rclassic(struct cbq_class *cl) @@ -595,7 +596,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) struct Qdisc *sch = q->watchdog.qdisc; psched_time_t now; psched_tdiff_t delay = 0; - unsigned pmask; + unsigned int pmask; now = psched_get_time(); @@ -665,15 +666,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) #endif /* - It is mission critical procedure. - - We "regenerate" toplevel cutoff, if transmitting class - has backlog and it is not regulated. It is not part of - original CBQ description, but looks more reasonable. - Probably, it is wrong. This question needs further investigation. -*/ + * It is mission critical procedure. + * + * We "regenerate" toplevel cutoff, if transmitting class + * has backlog and it is not regulated. It is not part of + * original CBQ description, but looks more reasonable. + * Probably, it is wrong. This question needs further investigation. + */ -static __inline__ void +static inline void cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, struct cbq_class *borrowed) { @@ -684,7 +685,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, q->toplevel = borrowed->level; return; } - } while ((borrowed=borrowed->borrow) != NULL); + } while ((borrowed = borrowed->borrow) != NULL); } #if 0 /* It is not necessary now. Uncommenting it @@ -712,10 +713,10 @@ cbq_update(struct cbq_sched_data *q) cl->bstats.bytes += len; /* - (now - last) is total time between packet right edges. - (last_pktlen/rate) is "virtual" busy time, so that - - idle = (now - last) - last_pktlen/rate + * (now - last) is total time between packet right edges. + * (last_pktlen/rate) is "virtual" busy time, so that + * + * idle = (now - last) - last_pktlen/rate */ idle = q->now - cl->last; @@ -725,9 +726,9 @@ cbq_update(struct cbq_sched_data *q) idle -= L2T(cl, len); /* true_avgidle := (1-W)*true_avgidle + W*idle, - where W=2^{-ewma_log}. But cl->avgidle is scaled: - cl->avgidle == true_avgidle/W, - hence: + * where W=2^{-ewma_log}. But cl->avgidle is scaled: + * cl->avgidle == true_avgidle/W, + * hence: */ avgidle += idle - (avgidle>>cl->ewma_log); } @@ -741,22 +742,22 @@ cbq_update(struct cbq_sched_data *q) cl->avgidle = avgidle; /* Calculate expected time, when this class - will be allowed to send. - It will occur, when: - (1-W)*true_avgidle + W*delay = 0, i.e. - idle = (1/W - 1)*(-true_avgidle) - or - idle = (1 - W)*(-cl->avgidle); + * will be allowed to send. + * It will occur, when: + * (1-W)*true_avgidle + W*delay = 0, i.e. + * idle = (1/W - 1)*(-true_avgidle) + * or + * idle = (1 - W)*(-cl->avgidle); */ idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); /* - That is not all. - To maintain the rate allocated to the class, - we add to undertime virtual clock, - necessary to complete transmitted packet. - (len/phys_bandwidth has been already passed - to the moment of cbq_update) + * That is not all. + * To maintain the rate allocated to the class, + * we add to undertime virtual clock, + * necessary to complete transmitted packet. + * (len/phys_bandwidth has been already passed + * to the moment of cbq_update) */ idle -= L2T(&q->link, len); @@ -778,7 +779,7 @@ cbq_update(struct cbq_sched_data *q) cbq_update_toplevel(q, this, q->tx_borrowed); } -static __inline__ struct cbq_class * +static inline struct cbq_class * cbq_under_limit(struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(cl->qdisc); @@ -794,16 +795,17 @@ cbq_under_limit(struct cbq_class *cl) do { /* It is very suspicious place. Now overlimit - action is generated for not bounded classes - only if link is completely congested. - Though it is in agree with ancestor-only paradigm, - it looks very stupid. Particularly, - it means that this chunk of code will either - never be called or result in strong amplification - of burstiness. Dangerous, silly, and, however, - no another solution exists. + * action is generated for not bounded classes + * only if link is completely congested. + * Though it is in agree with ancestor-only paradigm, + * it looks very stupid. Particularly, + * it means that this chunk of code will either + * never be called or result in strong amplification + * of burstiness. Dangerous, silly, and, however, + * no another solution exists. */ - if ((cl = cl->borrow) == NULL) { + cl = cl->borrow; + if (!cl) { this_cl->qstats.overlimits++; this_cl->overlimit(this_cl); return NULL; @@ -816,7 +818,7 @@ cbq_under_limit(struct cbq_class *cl) return cl; } -static __inline__ struct sk_buff * +static inline struct sk_buff * cbq_dequeue_prio(struct Qdisc *sch, int prio) { struct cbq_sched_data *q = qdisc_priv(sch); @@ -840,7 +842,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) if (cl->deficit <= 0) { /* Class exhausted its allotment per - this round. Switch to the next one. + * this round. Switch to the next one. */ deficit = 1; cl->deficit += cl->quantum; @@ -850,8 +852,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) skb = cl->q->dequeue(cl->q); /* Class did not give us any skb :-( - It could occur even if cl->q->q.qlen != 0 - f.e. if cl->q == "tbf" + * It could occur even if cl->q->q.qlen != 0 + * f.e. if cl->q == "tbf" */ if (skb == NULL) goto skip_class; @@ -880,7 +882,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) skip_class: if (cl->q->q.qlen == 0 || prio != cl->cpriority) { /* Class is empty or penalized. - Unlink it from active chain. + * Unlink it from active chain. */ cl_prev->next_alive = cl->next_alive; cl->next_alive = NULL; @@ -919,14 +921,14 @@ next_class: return NULL; } -static __inline__ struct sk_buff * +static inline struct sk_buff * cbq_dequeue_1(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - unsigned activemask; + unsigned int activemask; - activemask = q->activemask&0xFF; + activemask = q->activemask & 0xFF; while (activemask) { int prio = ffz(~activemask); activemask &= ~(1<tx_class) { psched_tdiff_t incr2; /* Time integrator. We calculate EOS time - by adding expected packet transmission time. - If real time is greater, we warp artificial clock, - so that: - - cbq_time = max(real_time, work); + * by adding expected packet transmission time. + * If real time is greater, we warp artificial clock, + * so that: + * + * cbq_time = max(real_time, work); */ incr2 = L2T(&q->link, q->tx_len); q->now += incr2; @@ -977,22 +979,22 @@ cbq_dequeue(struct Qdisc *sch) } /* All the classes are overlimit. - - It is possible, if: - - 1. Scheduler is empty. - 2. Toplevel cutoff inhibited borrowing. - 3. Root class is overlimit. - - Reset 2d and 3d conditions and retry. - - Note, that NS and cbq-2.0 are buggy, peeking - an arbitrary class is appropriate for ancestor-only - sharing, but not for toplevel algorithm. - - Our version is better, but slower, because it requires - two passes, but it is unavoidable with top-level sharing. - */ + * + * It is possible, if: + * + * 1. Scheduler is empty. + * 2. Toplevel cutoff inhibited borrowing. + * 3. Root class is overlimit. + * + * Reset 2d and 3d conditions and retry. + * + * Note, that NS and cbq-2.0 are buggy, peeking + * an arbitrary class is appropriate for ancestor-only + * sharing, but not for toplevel algorithm. + * + * Our version is better, but slower, because it requires + * two passes, but it is unavoidable with top-level sharing. + */ if (q->toplevel == TC_CBQ_MAXLEVEL && q->link.undertime == PSCHED_PASTPERFECT) @@ -1003,7 +1005,8 @@ cbq_dequeue(struct Qdisc *sch) } /* No packets in scheduler or nobody wants to give them to us :-( - Sigh... start watchdog timer in the last case. */ + * Sigh... start watchdog timer in the last case. + */ if (sch->q.qlen) { sch->qstats.overlimits++; @@ -1025,13 +1028,14 @@ static void cbq_adjust_levels(struct cbq_class *this) int level = 0; struct cbq_class *cl; - if ((cl = this->children) != NULL) { + cl = this->children; + if (cl) { do { if (cl->level > level) level = cl->level; } while ((cl = cl->sibling) != this->children); } - this->level = level+1; + this->level = level + 1; } while ((this = this->tparent) != NULL); } @@ -1047,14 +1051,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { /* BUGGGG... Beware! This expression suffer of - arithmetic overflows! + * arithmetic overflows! */ if (cl->priority == prio) { cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ q->quanta[prio]; } if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { - printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); + pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n", + cl->common.classid, cl->quantum); cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; } } @@ -1065,18 +1070,18 @@ static void cbq_sync_defmap(struct cbq_class *cl) { struct cbq_sched_data *q = qdisc_priv(cl->qdisc); struct cbq_class *split = cl->split; - unsigned h; + unsigned int h; int i; if (split == NULL) return; - for (i=0; i<=TC_PRIO_MAX; i++) { - if (split->defaults[i] == cl && !(cl->defmap&(1<defaults[i] == cl && !(cl->defmap & (1<defaults[i] = NULL; } - for (i=0; i<=TC_PRIO_MAX; i++) { + for (i = 0; i <= TC_PRIO_MAX; i++) { int level = split->level; if (split->defaults[i]) @@ -1089,7 +1094,7 @@ static void cbq_sync_defmap(struct cbq_class *cl) hlist_for_each_entry(c, n, &q->clhash.hash[h], common.hnode) { if (c->split == split && c->level < level && - c->defmap&(1<defmap & (1<defaults[i] = c; level = c->level; } @@ -1103,7 +1108,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma struct cbq_class *split = NULL; if (splitid == 0) { - if ((split = cl->split) == NULL) + split = cl->split; + if (!split) return; splitid = split->common.classid; } @@ -1121,9 +1127,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma cl->defmap = 0; cbq_sync_defmap(cl); cl->split = split; - cl->defmap = def&mask; + cl->defmap = def & mask; } else - cl->defmap = (cl->defmap&~mask)|(def&mask); + cl->defmap = (cl->defmap & ~mask) | (def & mask); cbq_sync_defmap(cl); } @@ -1136,7 +1142,7 @@ static void cbq_unlink_class(struct cbq_class *this) qdisc_class_hash_remove(&q->clhash, &this->common); if (this->tparent) { - clp=&this->sibling; + clp = &this->sibling; cl = *clp; do { if (cl == this) { @@ -1175,7 +1181,7 @@ static void cbq_link_class(struct cbq_class *this) } } -static unsigned int cbq_drop(struct Qdisc* sch) +static unsigned int cbq_drop(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl, *cl_head; @@ -1183,7 +1189,8 @@ static unsigned int cbq_drop(struct Qdisc* sch) unsigned int len; for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { - if ((cl_head = q->active[prio]) == NULL) + cl_head = q->active[prio]; + if (!cl_head) continue; cl = cl_head; @@ -1200,13 +1207,13 @@ static unsigned int cbq_drop(struct Qdisc* sch) } static void -cbq_reset(struct Qdisc* sch) +cbq_reset(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; struct hlist_node *n; int prio; - unsigned h; + unsigned int h; q->activemask = 0; q->pmask = 0; @@ -1238,21 +1245,21 @@ cbq_reset(struct Qdisc* sch) static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) { - if (lss->change&TCF_CBQ_LSS_FLAGS) { - cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; - cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; + if (lss->change & TCF_CBQ_LSS_FLAGS) { + cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; + cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; } - if (lss->change&TCF_CBQ_LSS_EWMA) + if (lss->change & TCF_CBQ_LSS_EWMA) cl->ewma_log = lss->ewma_log; - if (lss->change&TCF_CBQ_LSS_AVPKT) + if (lss->change & TCF_CBQ_LSS_AVPKT) cl->avpkt = lss->avpkt; - if (lss->change&TCF_CBQ_LSS_MINIDLE) + if (lss->change & TCF_CBQ_LSS_MINIDLE) cl->minidle = -(long)lss->minidle; - if (lss->change&TCF_CBQ_LSS_MAXIDLE) { + if (lss->change & TCF_CBQ_LSS_MAXIDLE) { cl->maxidle = lss->maxidle; cl->avgidle = lss->maxidle; } - if (lss->change&TCF_CBQ_LSS_OFFTIME) + if (lss->change & TCF_CBQ_LSS_OFFTIME) cl->offtime = lss->offtime; return 0; } @@ -1280,10 +1287,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) if (wrr->weight) cl->weight = wrr->weight; if (wrr->priority) { - cl->priority = wrr->priority-1; + cl->priority = wrr->priority - 1; cl->cpriority = cl->priority; if (cl->priority >= cl->priority2) - cl->priority2 = TC_CBQ_MAXPRIO-1; + cl->priority2 = TC_CBQ_MAXPRIO - 1; } cbq_addprio(q, cl); @@ -1300,10 +1307,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) cl->overlimit = cbq_ovl_delay; break; case TC_CBQ_OVL_LOWPRIO: - if (ovl->priority2-1 >= TC_CBQ_MAXPRIO || - ovl->priority2-1 <= cl->priority) + if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO || + ovl->priority2 - 1 <= cl->priority) return -EINVAL; - cl->priority2 = ovl->priority2-1; + cl->priority2 = ovl->priority2 - 1; cl->overlimit = cbq_ovl_lowprio; break; case TC_CBQ_OVL_DROP: @@ -1382,9 +1389,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if (!q->link.q) q->link.q = &noop_qdisc; - q->link.priority = TC_CBQ_MAXPRIO-1; - q->link.priority2 = TC_CBQ_MAXPRIO-1; - q->link.cpriority = TC_CBQ_MAXPRIO-1; + q->link.priority = TC_CBQ_MAXPRIO - 1; + q->link.priority2 = TC_CBQ_MAXPRIO - 1; + q->link.cpriority = TC_CBQ_MAXPRIO - 1; q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; q->link.overlimit = cbq_ovl_classic; q->link.allot = psched_mtu(qdisc_dev(sch)); @@ -1415,7 +1422,7 @@ put_rtab: return err; } -static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) +static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); @@ -1427,7 +1434,7 @@ nla_put_failure: return -1; } -static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) +static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_lssopt opt; @@ -1452,15 +1459,15 @@ nla_put_failure: return -1; } -static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) +static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; opt.flags = 0; opt.allot = cl->allot; - opt.priority = cl->priority+1; - opt.cpriority = cl->cpriority+1; + opt.priority = cl->priority + 1; + opt.cpriority = cl->cpriority + 1; opt.weight = cl->weight; NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); return skb->len; @@ -1470,13 +1477,13 @@ nla_put_failure: return -1; } -static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) +static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_ovl opt; opt.strategy = cl->ovl_strategy; - opt.priority2 = cl->priority2+1; + opt.priority2 = cl->priority2 + 1; opt.pad = 0; opt.penalty = cl->penalty; NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); @@ -1487,7 +1494,7 @@ nla_put_failure: return -1; } -static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) +static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_fopt opt; @@ -1506,7 +1513,7 @@ nla_put_failure: } #ifdef CONFIG_NET_CLS_ACT -static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) +static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) { unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_police opt; @@ -1570,7 +1577,7 @@ static int cbq_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { - struct cbq_class *cl = (struct cbq_class*)arg; + struct cbq_class *cl = (struct cbq_class *)arg; struct nlattr *nest; if (cl->tparent) @@ -1598,7 +1605,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class*)arg; + struct cbq_class *cl = (struct cbq_class *)arg; cl->qstats.qlen = cl->q->q.qlen; cl->xstats.avgidle = cl->avgidle; @@ -1618,7 +1625,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old) { - struct cbq_class *cl = (struct cbq_class*)arg; + struct cbq_class *cl = (struct cbq_class *)arg; if (new == NULL) { new = qdisc_create_dflt(sch->dev_queue, @@ -1641,10 +1648,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, return 0; } -static struct Qdisc * -cbq_leaf(struct Qdisc *sch, unsigned long arg) +static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) { - struct cbq_class *cl = (struct cbq_class*)arg; + struct cbq_class *cl = (struct cbq_class *)arg; return cl->q; } @@ -1683,13 +1689,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) kfree(cl); } -static void -cbq_destroy(struct Qdisc* sch) +static void cbq_destroy(struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); struct hlist_node *n, *next; struct cbq_class *cl; - unsigned h; + unsigned int h; #ifdef CONFIG_NET_CLS_ACT q->rx_class = NULL; @@ -1713,7 +1718,7 @@ cbq_destroy(struct Qdisc* sch) static void cbq_put(struct Qdisc *sch, unsigned long arg) { - struct cbq_class *cl = (struct cbq_class*)arg; + struct cbq_class *cl = (struct cbq_class *)arg; if (--cl->refcnt == 0) { #ifdef CONFIG_NET_CLS_ACT @@ -1736,7 +1741,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t { int err; struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class*)*arg; + struct cbq_class *cl = (struct cbq_class *)*arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_CBQ_MAX + 1]; struct cbq_class *parent; @@ -1828,13 +1833,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (classid) { err = -EINVAL; - if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid)) + if (TC_H_MAJ(classid ^ sch->handle) || + cbq_class_lookup(q, classid)) goto failure; } else { int i; - classid = TC_H_MAKE(sch->handle,0x8000); + classid = TC_H_MAKE(sch->handle, 0x8000); - for (i=0; i<0x8000; i++) { + for (i = 0; i < 0x8000; i++) { if (++q->hgenerator >= 0x8000) q->hgenerator = 1; if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) @@ -1891,11 +1897,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->minidle = -0x7FFFFFFF; cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - if (cl->ewma_log==0) + if (cl->ewma_log == 0) cl->ewma_log = q->link.ewma_log; - if (cl->maxidle==0) + if (cl->maxidle == 0) cl->maxidle = q->link.maxidle; - if (cl->avpkt==0) + if (cl->avpkt == 0) cl->avpkt = q->link.avpkt; cl->overlimit = cbq_ovl_classic; if (tb[TCA_CBQ_OVL_STRATEGY]) @@ -1921,7 +1927,7 @@ failure: static int cbq_delete(struct Qdisc *sch, unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class*)arg; + struct cbq_class *cl = (struct cbq_class *)arg; unsigned int qlen; if (cl->filters || cl->children || cl == &q->link) @@ -1979,7 +1985,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *p = (struct cbq_class*)parent; + struct cbq_class *p = (struct cbq_class *)parent; struct cbq_class *cl = cbq_class_lookup(q, classid); if (cl) { @@ -1993,7 +1999,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) { - struct cbq_class *cl = (struct cbq_class*)arg; + struct cbq_class *cl = (struct cbq_class *)arg; cl->filters--; } @@ -2003,7 +2009,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl; struct hlist_node *n; - unsigned h; + unsigned int h; if (arg->stop) return; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 60f4bdd..4970d56 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -137,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, mask = nla_get_u8(tb[TCA_DSMARK_MASK]); if (tb[TCA_DSMARK_VALUE]) - p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); + p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); if (tb[TCA_DSMARK_MASK]) - p->mask[*arg-1] = mask; + p->mask[*arg - 1] = mask; err = 0; @@ -155,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg) if (!dsmark_valid_index(p, arg)) return -EINVAL; - p->mask[arg-1] = 0xff; - p->value[arg-1] = 0; + p->mask[arg - 1] = 0xff; + p->value[arg - 1] = 0; return 0; } @@ -175,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) if (p->mask[i] == 0xff && !p->value[i]) goto ignore; if (walker->count >= walker->skip) { - if (walker->fn(sch, i+1, walker) < 0) { + if (walker->fn(sch, i + 1, walker) < 0) { walker->stop = 1; break; } @@ -304,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) * and don't need yet another qdisc as a bypass. */ if (p->mask[index] != 0xff || p->value[index]) - printk(KERN_WARNING - "dsmark_dequeue: unsupported protocol %d\n", - ntohs(skb->protocol)); + pr_warning("dsmark_dequeue: unsupported protocol %d\n", + ntohs(skb->protocol)); break; } @@ -424,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, if (!dsmark_valid_index(p, cl)) return -EINVAL; - tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); + tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); tcm->tcm_info = p->q->handle; opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; - NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]); - NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]); + NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]); + NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]); return nla_nest_end(skb, opts); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index aa4d633..b3075f8 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -19,12 +19,11 @@ /* 1 band FIFO pseudo-"scheduler" */ -struct fifo_sched_data -{ +struct fifo_sched_data { u32 limit; }; -static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fifo_sched_data *q = qdisc_priv(sch); @@ -34,7 +33,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) return qdisc_reshape_fail(skb, sch); } -static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct fifo_sched_data *q = qdisc_priv(sch); @@ -44,7 +43,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) return qdisc_reshape_fail(skb, sch); } -static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct sk_buff *skb_head; struct fifo_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 723b278..2f1cb62 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -87,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, */ kfree_skb(skb); if (net_ratelimit()) - printk(KERN_WARNING "Dead loop on netdevice %s, " - "fix it urgently!\n", dev_queue->dev->name); + pr_warning("Dead loop on netdevice %s, fix it urgently!\n", + dev_queue->dev->name); ret = qdisc_qlen(q); } else { /* @@ -137,8 +137,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, } else { /* Driver returned NETDEV_TX_BUSY - requeue skb */ if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) - printk(KERN_WARNING "BUG %s code %d qlen %d\n", - dev->name, ret, q->q.qlen); + pr_warning("BUG %s code %d qlen %d\n", + dev->name, ret, q->q.qlen); ret = dev_requeue_skb(skb, q); } @@ -412,8 +412,9 @@ static struct Qdisc noqueue_qdisc = { }; -static const u8 prio2band[TC_PRIO_MAX+1] = - { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; +static const u8 prio2band[TC_PRIO_MAX + 1] = { + 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 +}; /* 3-band FIFO queue: old style, but should be a bit faster than generic prio+fifo combination. @@ -445,7 +446,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, return priv->q + band; } -static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) +static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) { if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; @@ -460,7 +461,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) return qdisc_drop(skb, qdisc); } -static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) +static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) { struct pfifo_fast_priv *priv = qdisc_priv(qdisc); int band = bitmap2band[priv->bitmap]; @@ -479,7 +480,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) return NULL; } -static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) +static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) { struct pfifo_fast_priv *priv = qdisc_priv(qdisc); int band = bitmap2band[priv->bitmap]; @@ -493,7 +494,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) return NULL; } -static void pfifo_fast_reset(struct Qdisc* qdisc) +static void pfifo_fast_reset(struct Qdisc *qdisc) { int prio; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); @@ -510,7 +511,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) { struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); + memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; @@ -681,20 +682,18 @@ static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) { - struct Qdisc *qdisc; + struct Qdisc *qdisc = &noqueue_qdisc; if (dev->tx_queue_len) { qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, TC_H_ROOT); if (!qdisc) { - printk(KERN_INFO "%s: activation failed\n", dev->name); + netdev_info(dev, "activation failed\n"); return; } /* Can by-pass the queue discipline for default qdisc */ qdisc->flags |= TCQ_F_CAN_BYPASS; - } else { - qdisc = &noqueue_qdisc; } dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 51dcc2a..b9493a0 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -32,8 +32,7 @@ struct gred_sched_data; struct gred_sched; -struct gred_sched_data -{ +struct gred_sched_data { u32 limit; /* HARD maximal queue length */ u32 DP; /* the drop pramaters */ u32 bytesin; /* bytes seen on virtualQ so far*/ @@ -50,8 +49,7 @@ enum { GRED_RIO_MODE, }; -struct gred_sched -{ +struct gred_sched { struct gred_sched_data *tab[MAX_DPs]; unsigned long flags; u32 red_flags; @@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t) return t->red_flags & TC_RED_HARDDROP; } -static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - struct gred_sched_data *q=NULL; - struct gred_sched *t= qdisc_priv(sch); + struct gred_sched_data *q = NULL; + struct gred_sched *t = qdisc_priv(sch); unsigned long qavg = 0; u16 dp = tc_index_to_dp(skb); - if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { + if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { dp = t->def; - if ((q = t->tab[dp]) == NULL) { + q = t->tab[dp]; + if (!q) { /* Pass through packets not assigned to a DP * if no default DP has been configured. This * allows for DP flows to be left untouched. @@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) for (i = 0; i < t->DPs; i++) { if (t->tab[i] && t->tab[i]->prio < q->prio && !red_is_idling(&t->tab[i]->parms)) - qavg +=t->tab[i]->parms.qavg; + qavg += t->tab[i]->parms.qavg; } } @@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) gred_store_wred_set(t, q); switch (red_action(&q->parms, q->parms.qavg + qavg)) { - case RED_DONT_MARK: - break; - - case RED_PROB_MARK: - sch->qstats.overlimits++; - if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { - q->stats.prob_drop++; - goto congestion_drop; - } - - q->stats.prob_mark++; - break; - - case RED_HARD_MARK: - sch->qstats.overlimits++; - if (gred_use_harddrop(t) || !gred_use_ecn(t) || - !INET_ECN_set_ce(skb)) { - q->stats.forced_drop++; - goto congestion_drop; - } - q->stats.forced_mark++; - break; + case RED_DONT_MARK: + break; + + case RED_PROB_MARK: + sch->qstats.overlimits++; + if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + q->stats.prob_mark++; + break; + + case RED_HARD_MARK: + sch->qstats.overlimits++; + if (gred_use_harddrop(t) || !gred_use_ecn(t) || + !INET_ECN_set_ce(skb)) { + q->stats.forced_drop++; + goto congestion_drop; + } + q->stats.forced_mark++; + break; } if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { @@ -241,7 +240,7 @@ congestion_drop: return NET_XMIT_CN; } -static struct sk_buff *gred_dequeue(struct Qdisc* sch) +static struct sk_buff *gred_dequeue(struct Qdisc *sch) { struct sk_buff *skb; struct gred_sched *t = qdisc_priv(sch); @@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { if (net_ratelimit()) - printk(KERN_WARNING "GRED: Unable to relocate " - "VQ 0x%x after dequeue, screwing up " - "backlog.\n", tc_index_to_dp(skb)); + pr_warning("GRED: Unable to relocate VQ 0x%x " + "after dequeue, screwing up " + "backlog.\n", tc_index_to_dp(skb)); } else { q->backlog -= qdisc_pkt_len(skb); @@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) return NULL; } -static unsigned int gred_drop(struct Qdisc* sch) +static unsigned int gred_drop(struct Qdisc *sch) { struct sk_buff *skb; struct gred_sched *t = qdisc_priv(sch); @@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch) if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { if (net_ratelimit()) - printk(KERN_WARNING "GRED: Unable to relocate " - "VQ 0x%x while dropping, screwing up " - "backlog.\n", tc_index_to_dp(skb)); + pr_warning("GRED: Unable to relocate VQ 0x%x " + "while dropping, screwing up " + "backlog.\n", tc_index_to_dp(skb)); } else { q->backlog -= len; q->stats.other++; @@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch) } -static void gred_reset(struct Qdisc* sch) +static void gred_reset(struct Qdisc *sch) { int i; struct gred_sched *t = qdisc_priv(sch); @@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) for (i = table->DPs; i < MAX_DPs; i++) { if (table->tab[i]) { - printk(KERN_WARNING "GRED: Warning: Destroying " - "shadowed VQ 0x%x\n", i); + pr_warning("GRED: Warning: Destroying " + "shadowed VQ 0x%x\n", i); gred_destroy_vq(table->tab[i]); table->tab[i] = NULL; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 2e45791..dea4009 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -81,8 +81,7 @@ * that are expensive on 32-bit architectures. */ -struct internal_sc -{ +struct internal_sc { u64 sm1; /* scaled slope of the 1st segment */ u64 ism1; /* scaled inverse-slope of the 1st segment */ u64 dx; /* the x-projection of the 1st segment */ @@ -92,8 +91,7 @@ struct internal_sc }; /* runtime service curve */ -struct runtime_sc -{ +struct runtime_sc { u64 x; /* current starting position on x-axis */ u64 y; /* current starting position on y-axis */ u64 sm1; /* scaled slope of the 1st segment */ @@ -104,15 +102,13 @@ struct runtime_sc u64 ism2; /* scaled inverse-slope of the 2nd segment */ }; -enum hfsc_class_flags -{ +enum hfsc_class_flags { HFSC_RSC = 0x1, HFSC_FSC = 0x2, HFSC_USC = 0x4 }; -struct hfsc_class -{ +struct hfsc_class { struct Qdisc_class_common cl_common; unsigned int refcnt; /* usage count */ @@ -140,8 +136,8 @@ struct hfsc_class u64 cl_cumul; /* cumulative work in bytes done by real-time criteria */ - u64 cl_d; /* deadline*/ - u64 cl_e; /* eligible time */ + u64 cl_d; /* deadline*/ + u64 cl_e; /* eligible time */ u64 cl_vt; /* virtual time */ u64 cl_f; /* time when this class will fit for link-sharing, max(myf, cfmin) */ @@ -176,8 +172,7 @@ struct hfsc_class unsigned long cl_nactive; /* number of active children */ }; -struct hfsc_sched -{ +struct hfsc_sched { u16 defcls; /* default class id */ struct hfsc_class root; /* root class */ struct Qdisc_class_hash clhash; /* class hash */ @@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len) if (go_active) { n = rb_last(&cl->cl_parent->vt_tree); if (n != NULL) { - max_cl = rb_entry(n, struct hfsc_class,vt_node); + max_cl = rb_entry(n, struct hfsc_class, vt_node); /* * set vt to the average of the min and max * classes. if the parent's period didn't @@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) return NULL; } #endif - if ((cl = (struct hfsc_class *)res.class) == NULL) { - if ((cl = hfsc_find_class(res.classid, sch)) == NULL) + cl = (struct hfsc_class *)res.class; + if (!cl) { + cl = hfsc_find_class(res.classid, sch); + if (!cl) break; /* filter selected invalid classid */ if (cl->level >= head->level) break; /* filter may only point downwards */ @@ -1316,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc) return -1; } -static inline int +static int hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) { if ((cl->cl_flags & HFSC_RSC) && @@ -1420,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch) struct hfsc_class *cl; u64 next_time = 0; - if ((cl = eltree_get_minel(q)) != NULL) + cl = eltree_get_minel(q); + if (cl) next_time = cl->cl_e; if (q->root.cl_cfmin != 0) { if (next_time == 0 || next_time > q->root.cl_cfmin) @@ -1626,7 +1624,8 @@ hfsc_dequeue(struct Qdisc *sch) * find the class with the minimum deadline among * the eligible classes. */ - if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { + cl = eltree_get_mindl(q, cur_time); + if (cl) { realtime = 1; } else { /* diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 984c1b0..3e86fd3 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -99,9 +99,10 @@ struct htb_class { struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ /* When class changes from state 1->2 and disconnects from - parent's feed then we lost ptr value and start from the - first child again. Here we store classid of the - last valid ptr (used when ptr is NULL). */ + * parent's feed then we lost ptr value and start from the + * first child again. Here we store classid of the + * last valid ptr (used when ptr is NULL). + */ u32 last_ptr_id[TC_HTB_NUMPRIO]; } inner; } un; @@ -185,7 +186,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull * then finish and return direct queue. */ -#define HTB_DIRECT (struct htb_class*)-1 +#define HTB_DIRECT ((struct htb_class *)-1L) static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) @@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int result; /* allow to select class by setting skb->priority to valid classid; - note that nfmark can be used too by attaching filter fw with no - rules in it */ + * note that nfmark can be used too by attaching filter fw with no + * rules in it + */ if (skb->priority == sch->handle) return HTB_DIRECT; /* X:0 (direct flow) selected */ - if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) + cl = htb_find(skb->priority, sch); + if (cl && cl->level == 0) return cl; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; @@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, return NULL; } #endif - if ((cl = (void *)res.class) == NULL) { + cl = (void *)res.class; + if (!cl) { if (res.classid == sch->handle) return HTB_DIRECT; /* X:0 (direct flow) */ - if ((cl = htb_find(res.classid, sch)) == NULL) + cl = htb_find(res.classid, sch); + if (!cl) break; /* filter selected invalid classid */ } if (!cl->level) @@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) if (p->un.inner.feed[prio].rb_node) /* parent already has its feed in use so that - reset bit in mask as parent is already ok */ + * reset bit in mask as parent is already ok + */ mask &= ~(1 << prio); htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); @@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) if (p->un.inner.ptr[prio] == cl->node + prio) { /* we are removing child which is pointed to from - parent feed - forget the pointer but remember - classid */ + * parent feed - forget the pointer but remember + * classid + */ p->un.inner.last_ptr_id[prio] = cl->common.classid; p->un.inner.ptr[prio] = NULL; } @@ -664,8 +671,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of - 1 to simplify things when jiffy is going to be incremented - too soon */ + * 1 to simplify things when jiffy is going to be incremented + * too soon + */ unsigned long stop_at = start + 2; while (time_before(jiffies, stop_at)) { struct htb_class *cl; @@ -688,7 +696,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, /* too much load - let's continue after a break for scheduling */ if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { - printk(KERN_WARNING "htb: too many events!\n"); + pr_warning("htb: too many events!\n"); q->warned |= HTB_WARN_TOOMANYEVENTS; } @@ -696,7 +704,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL - is no such one exists. */ + * is no such one exists. + */ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, u32 id) { @@ -740,12 +749,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, for (i = 0; i < 65535; i++) { if (!*sp->pptr && *sp->pid) { /* ptr was invalidated but id is valid - try to recover - the original or next ptr */ + * the original or next ptr + */ *sp->pptr = htb_id_find_next_upper(prio, sp->root, *sp->pid); } *sp->pid = 0; /* ptr is valid now so that remove this hint as it - can become out of date quickly */ + * can become out of date quickly + */ if (!*sp->pptr) { /* we are at right end; rewind & go up */ *sp->pptr = sp->root; while ((*sp->pptr)->rb_left) @@ -773,7 +784,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, } /* dequeues packet at given priority and level; call only if - you are sure that there is active class at prio/level */ + * you are sure that there is active class at prio/level + */ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, int level) { @@ -790,9 +802,10 @@ next: return NULL; /* class can be empty - it is unlikely but can be true if leaf - qdisc drops packets in enqueue routine or if someone used - graft operation on the leaf since last dequeue; - simply deactivate and skip such class */ + * qdisc drops packets in enqueue routine or if someone used + * graft operation on the leaf since last dequeue; + * simply deactivate and skip such class + */ if (unlikely(cl->un.leaf.q->q.qlen == 0)) { struct htb_class *next; htb_deactivate(q, cl); @@ -832,7 +845,8 @@ next: ptr[0]) + prio); } /* this used to be after charge_class but this constelation - gives us slightly better performance */ + * gives us slightly better performance + */ if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); htb_charge_class(q, cl, level, skb); @@ -882,6 +896,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) m = ~q->row_mask[level]; while (m != (int)(-1)) { int prio = ffz(m); + m |= 1 << prio; skb = htb_dequeue_tree(q, prio, level); if (likely(skb != NULL)) { @@ -989,13 +1004,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) return err; if (tb[TCA_HTB_INIT] == NULL) { - printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); + pr_err("HTB: hey probably you have bad tc tool ?\n"); return -EINVAL; } gopt = nla_data(tb[TCA_HTB_INIT]); if (gopt->version != HTB_VER >> 16) { - printk(KERN_ERR - "HTB: need tc/htb version %d (minor is %d), you have %d\n", + pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n", HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); return -EINVAL; } @@ -1208,9 +1222,10 @@ static void htb_destroy(struct Qdisc *sch) cancel_work_sync(&q->work); qdisc_watchdog_cancel(&q->watchdog); /* This line used to be after htb_destroy_class call below - and surprisingly it worked in 2.4. But it must precede it - because filter need its target class alive to be able to call - unbind_filter on it (without Oops). */ + * and surprisingly it worked in 2.4. But it must precede it + * because filter need its target class alive to be able to call + * unbind_filter on it (without Oops). + */ tcf_destroy_chain(&q->filter_list); for (i = 0; i < q->clhash.hashsize; i++) { @@ -1344,11 +1359,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* check maximal depth */ if (parent && parent->parent && parent->parent->level < 2) { - printk(KERN_ERR "htb: tree is too deep\n"); + pr_err("htb: tree is too deep\n"); goto failure; } err = -ENOBUFS; - if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) + cl = kzalloc(sizeof(*cl), GFP_KERNEL); + if (!cl) goto failure; err = gen_new_estimator(&cl->bstats, &cl->rate_est, @@ -1368,8 +1384,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, RB_CLEAR_NODE(&cl->node[prio]); /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) - so that can't be used inside of sch_tree_lock - -- thanks to Karlis Peisenieks */ + * so that can't be used inside of sch_tree_lock + * -- thanks to Karlis Peisenieks + */ new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); sch_tree_lock(sch); @@ -1421,17 +1438,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } /* it used to be a nasty bug here, we have to check that node - is really leaf before changing cl->un.leaf ! */ + * is really leaf before changing cl->un.leaf ! + */ if (!cl->level) { cl->quantum = rtab->rate.rate / q->rate2quantum; if (!hopt->quantum && cl->quantum < 1000) { - printk(KERN_WARNING + pr_warning( "HTB: quantum of class %X is small. Consider r2q change.\n", cl->common.classid); cl->quantum = 1000; } if (!hopt->quantum && cl->quantum > 200000) { - printk(KERN_WARNING + pr_warning( "HTB: quantum of class %X is big. Consider r2q change.\n", cl->common.classid); cl->quantum = 200000; @@ -1480,13 +1498,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, struct htb_class *cl = htb_find(classid, sch); /*if (cl && !cl->level) return 0; - The line above used to be there to prevent attaching filters to - leaves. But at least tc_index filter uses this just to get class - for other reasons so that we have to allow for it. - ---- - 19.6.2002 As Werner explained it is ok - bind filter is just - another way to "lock" the class - unlike "get" this lock can - be broken by class during destroy IIUC. + * The line above used to be there to prevent attaching filters to + * leaves. But at least tc_index filter uses this just to get class + * for other reasons so that we have to allow for it. + * ---- + * 19.6.2002 As Werner explained it is ok - bind filter is just + * another way to "lock" the class - unlike "get" this lock can + * be broken by class during destroy IIUC. */ if (cl) cl->filter_cnt++; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 21f13da..820f2a7 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -156,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch) unsigned int len; struct Qdisc *qdisc; - for (band = q->bands-1; band >= 0; band--) { + for (band = q->bands - 1; band >= 0; band--) { qdisc = q->queues[band]; if (qdisc->ops->drop) { len = qdisc->ops->drop(qdisc); @@ -265,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->max_bands; i++) q->queues[i] = &noop_qdisc; - err = multiq_tune(sch,opt); + err = multiq_tune(sch, opt); if (err) kfree(q->queues); @@ -346,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, struct multiq_sched_data *q = qdisc_priv(sch); tcm->tcm_handle |= TC_H_MIN(cl); - tcm->tcm_info = q->queues[cl-1]->handle; + tcm->tcm_info = q->queues[cl - 1]->handle; return 0; } @@ -378,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) arg->count++; continue; } - if (arg->fn(sch, band+1, arg) < 0) { + if (arg->fn(sch, band + 1, arg) < 0) { arg->stop = 1; break; } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1c4bce8..c2bbbe6 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -211,8 +211,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) } cb = netem_skb_cb(skb); - if (q->gap == 0 || /* not doing reordering */ - q->counter < q->gap || /* inside last reordering gap */ + if (q->gap == 0 || /* not doing reordering */ + q->counter < q->gap || /* inside last reordering gap */ q->reorder < get_crandom(&q->reorder_cor)) { psched_time_t now; psched_tdiff_t delay; @@ -249,7 +249,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -static unsigned int netem_drop(struct Qdisc* sch) +static unsigned int netem_drop(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); unsigned int len = 0; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 966158d..3bea31e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -22,8 +22,7 @@ #include -struct prio_sched_data -{ +struct prio_sched_data { int bands; struct tcf_proto *filter_list; u8 prio2band[TC_PRIO_MAX+1]; @@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) if (!q->filter_list || err < 0) { if (TC_H_MAJ(band)) band = 0; - return q->queues[q->prio2band[band&TC_PRIO_MAX]]; + return q->queues[q->prio2band[band & TC_PRIO_MAX]]; } band = res.classid; } @@ -107,7 +106,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch) return NULL; } -static struct sk_buff *prio_dequeue(struct Qdisc* sch) +static struct sk_buff *prio_dequeue(struct Qdisc *sch) { struct prio_sched_data *q = qdisc_priv(sch); int prio; @@ -124,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch) } -static unsigned int prio_drop(struct Qdisc* sch) +static unsigned int prio_drop(struct Qdisc *sch) { struct prio_sched_data *q = qdisc_priv(sch); int prio; @@ -143,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch) static void -prio_reset(struct Qdisc* sch) +prio_reset(struct Qdisc *sch) { int prio; struct prio_sched_data *q = qdisc_priv(sch); - for (prio=0; priobands; prio++) + for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); sch->q.qlen = 0; } static void -prio_destroy(struct Qdisc* sch) +prio_destroy(struct Qdisc *sch) { int prio; struct prio_sched_data *q = qdisc_priv(sch); tcf_destroy_chain(&q->filter_list); - for (prio=0; priobands; prio++) + for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -177,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) return -EINVAL; - for (i=0; i<=TC_PRIO_MAX; i++) { + for (i = 0; i <= TC_PRIO_MAX; i++) { if (qopt->priomap[i] >= qopt->bands) return -EINVAL; } @@ -186,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i=q->bands; ibands; i < TCQ_PRIO_BANDS; i++) { struct Qdisc *child = q->queues[i]; q->queues[i] = &noop_qdisc; if (child != &noop_qdisc) { @@ -196,9 +195,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) } sch_tree_unlock(sch); - for (i=0; ibands; i++) { + for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child, *old; + child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); @@ -224,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) struct prio_sched_data *q = qdisc_priv(sch); int i; - for (i=0; iqueues[i] = &noop_qdisc; if (opt == NULL) { @@ -232,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) } else { int err; - if ((err= prio_tune(sch, opt)) != 0) + if ((err = prio_tune(sch, opt)) != 0) return err; } return 0; @@ -245,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_prio_qopt opt; opt.bands = q->bands; - memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); + memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); @@ -342,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) arg->count++; continue; } - if (arg->fn(sch, prio+1, arg) < 0) { + if (arg->fn(sch, prio + 1, arg) < 0) { arg->stop = 1; break; } @@ -350,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl) { struct prio_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index a6009c5..6891575 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -36,8 +36,7 @@ if RED works correctly. */ -struct red_sched_data -{ +struct red_sched_data { u32 limit; /* HARD maximal queue length */ unsigned char flags; struct red_parms parms; @@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q) return q->flags & TC_RED_HARDDROP; } -static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; @@ -67,29 +66,29 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) red_end_of_idle_period(&q->parms); switch (red_action(&q->parms, q->parms.qavg)) { - case RED_DONT_MARK: - break; - - case RED_PROB_MARK: - sch->qstats.overlimits++; - if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { - q->stats.prob_drop++; - goto congestion_drop; - } - - q->stats.prob_mark++; - break; - - case RED_HARD_MARK: - sch->qstats.overlimits++; - if (red_use_harddrop(q) || !red_use_ecn(q) || - !INET_ECN_set_ce(skb)) { - q->stats.forced_drop++; - goto congestion_drop; - } - - q->stats.forced_mark++; - break; + case RED_DONT_MARK: + break; + + case RED_PROB_MARK: + sch->qstats.overlimits++; + if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + q->stats.prob_mark++; + break; + + case RED_HARD_MARK: + sch->qstats.overlimits++; + if (red_use_harddrop(q) || !red_use_ecn(q) || + !INET_ECN_set_ce(skb)) { + q->stats.forced_drop++; + goto congestion_drop; + } + + q->stats.forced_mark++; + break; } ret = qdisc_enqueue(skb, child); @@ -107,7 +106,7 @@ congestion_drop: return NET_XMIT_CN; } -static struct sk_buff * red_dequeue(struct Qdisc* sch) +static struct sk_buff *red_dequeue(struct Qdisc *sch) { struct sk_buff *skb; struct red_sched_data *q = qdisc_priv(sch); @@ -122,7 +121,7 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch) return skb; } -static struct sk_buff * red_peek(struct Qdisc* sch) +static struct sk_buff *red_peek(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; @@ -130,7 +129,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch) return child->ops->peek(child); } -static unsigned int red_drop(struct Qdisc* sch) +static unsigned int red_drop(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; @@ -149,7 +148,7 @@ static unsigned int red_drop(struct Qdisc* sch) return 0; } -static void red_reset(struct Qdisc* sch) +static void red_reset(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); @@ -216,7 +215,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) return 0; } -static int red_init(struct Qdisc* sch, struct nlattr *opt) +static int red_init(struct Qdisc *sch, struct nlattr *opt) { struct red_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 239ec53..54a36f4 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -92,8 +92,7 @@ typedef unsigned char sfq_index; * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] * are 'pointers' to dep[] array */ -struct sfq_head -{ +struct sfq_head { sfq_index next; sfq_index prev; }; @@ -108,11 +107,10 @@ struct sfq_slot { short allot; /* credit for this slot */ }; -struct sfq_sched_data -{ +struct sfq_sched_data { /* Parameters */ int perturb_period; - unsigned quantum; /* Allotment per round: MUST BE >= MTU */ + unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ int limit; /* Variables */ @@ -137,12 +135,12 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index return &q->dep[val - SFQ_SLOTS]; } -static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) +static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); } -static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) +static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) { u32 h, h2; @@ -157,13 +155,13 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) iph = ip_hdr(skb); h = (__force u32)iph->daddr; h2 = (__force u32)iph->saddr ^ iph->protocol; - if (iph->frag_off & htons(IP_MF|IP_OFFSET)) + if (iph->frag_off & htons(IP_MF | IP_OFFSET)) break; poff = proto_ports_offset(iph->protocol); if (poff >= 0 && pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { iph = ip_hdr(skb); - h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff); + h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff); } break; } @@ -181,7 +179,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) if (poff >= 0 && pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) { iph = ipv6_hdr(skb); - h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff); + h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff); } break; } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 77565e7..475edfb 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -97,8 +97,7 @@ changed the limit is not effective anymore. */ -struct tbf_sched_data -{ +struct tbf_sched_data { /* Parameters */ u32 limit; /* Maximal length of backlog: bytes */ u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ @@ -115,10 +114,10 @@ struct tbf_sched_data struct qdisc_watchdog watchdog; /* Watchdog timer */ }; -#define L2T(q,L) qdisc_l2t((q)->R_tab,L) -#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) +#define L2T(q, L) qdisc_l2t((q)->R_tab, L) +#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L) -static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) +static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); int ret; @@ -138,7 +137,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_SUCCESS; } -static unsigned int tbf_drop(struct Qdisc* sch) +static unsigned int tbf_drop(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); unsigned int len = 0; @@ -150,7 +149,7 @@ static unsigned int tbf_drop(struct Qdisc* sch) return len; } -static struct sk_buff *tbf_dequeue(struct Qdisc* sch) +static struct sk_buff *tbf_dequeue(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; @@ -209,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) return NULL; } -static void tbf_reset(struct Qdisc* sch) +static void tbf_reset(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -227,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, }; -static int tbf_change(struct Qdisc* sch, struct nlattr *opt) +static int tbf_change(struct Qdisc *sch, struct nlattr *opt) { int err; struct tbf_sched_data *q = qdisc_priv(sch); @@ -236,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) struct qdisc_rate_table *rtab = NULL; struct qdisc_rate_table *ptab = NULL; struct Qdisc *child = NULL; - int max_size,n; + int max_size, n; err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); if (err < 0) @@ -259,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) } for (n = 0; n < 256; n++) - if (rtab->data[n] > qopt->buffer) break; - max_size = (n << qopt->rate.cell_log)-1; + if (rtab->data[n] > qopt->buffer) + break; + max_size = (n << qopt->rate.cell_log) - 1; if (ptab) { int size; for (n = 0; n < 256; n++) - if (ptab->data[n] > qopt->mtu) break; - size = (n << qopt->peakrate.cell_log)-1; - if (size < max_size) max_size = size; + if (ptab->data[n] > qopt->mtu) + break; + size = (n << qopt->peakrate.cell_log) - 1; + if (size < max_size) + max_size = size; } if (max_size < 0) goto done; @@ -310,7 +312,7 @@ done: return err; } -static int tbf_init(struct Qdisc* sch, struct nlattr *opt) +static int tbf_init(struct Qdisc *sch, struct nlattr *opt) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -422,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static const struct Qdisc_class_ops tbf_class_ops = -{ +static const struct Qdisc_class_ops tbf_class_ops = { .graft = tbf_graft, .leaf = tbf_leaf, .get = tbf_get, diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 84ce48e..64c071d 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -53,8 +53,7 @@ which will not break load balancing, though native slave traffic will have the highest priority. */ -struct teql_master -{ +struct teql_master { struct Qdisc_ops qops; struct net_device *dev; struct Qdisc *slaves; @@ -65,22 +64,21 @@ struct teql_master unsigned long tx_dropped; }; -struct teql_sched_data -{ +struct teql_sched_data { struct Qdisc *next; struct teql_master *m; struct neighbour *ncache; struct sk_buff_head q; }; -#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next) +#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next) -#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT) +#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT) /* "teql*" qdisc routines */ static int -teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) +teql_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct net_device *dev = qdisc_dev(sch); struct teql_sched_data *q = qdisc_priv(sch); @@ -97,7 +95,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) } static struct sk_buff * -teql_dequeue(struct Qdisc* sch) +teql_dequeue(struct Qdisc *sch) { struct teql_sched_data *dat = qdisc_priv(sch); struct netdev_queue *dat_queue; @@ -117,13 +115,13 @@ teql_dequeue(struct Qdisc* sch) } static struct sk_buff * -teql_peek(struct Qdisc* sch) +teql_peek(struct Qdisc *sch) { /* teql is meant to be used as root qdisc */ return NULL; } -static __inline__ void +static inline void teql_neigh_release(struct neighbour *n) { if (n) @@ -131,7 +129,7 @@ teql_neigh_release(struct neighbour *n) } static void -teql_reset(struct Qdisc* sch) +teql_reset(struct Qdisc *sch) { struct teql_sched_data *dat = qdisc_priv(sch); @@ -141,13 +139,14 @@ teql_reset(struct Qdisc* sch) } static void -teql_destroy(struct Qdisc* sch) +teql_destroy(struct Qdisc *sch) { struct Qdisc *q, *prev; struct teql_sched_data *dat = qdisc_priv(sch); struct teql_master *master = dat->m; - if ((prev = master->slaves) != NULL) { + prev = master->slaves; + if (prev) { do { q = NEXT_SLAVE(prev); if (q == sch) { @@ -179,7 +178,7 @@ teql_destroy(struct Qdisc* sch) static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) { struct net_device *dev = qdisc_dev(sch); - struct teql_master *m = (struct teql_master*)sch->ops; + struct teql_master *m = (struct teql_master *)sch->ops; struct teql_sched_data *q = qdisc_priv(sch); if (dev->hard_header_len > m->dev->hard_header_len) @@ -290,7 +289,8 @@ restart: nores = 0; busy = 0; - if ((q = start) == NULL) + q = start; + if (!q) goto drop; do { @@ -355,10 +355,10 @@ drop: static int teql_master_open(struct net_device *dev) { - struct Qdisc * q; + struct Qdisc *q; struct teql_master *m = netdev_priv(dev); int mtu = 0xFFFE; - unsigned flags = IFF_NOARP|IFF_MULTICAST; + unsigned int flags = IFF_NOARP | IFF_MULTICAST; if (m->slaves == NULL) return -EUNATCH; @@ -426,7 +426,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) do { if (new_mtu > qdisc_dev(q)->mtu) return -EINVAL; - } while ((q=NEXT_SLAVE(q)) != m->slaves); + } while ((q = NEXT_SLAVE(q)) != m->slaves); } dev->mtu = new_mtu; -- cgit v1.1 From 28a51ba59a1a983d63d4775e9bb8230fe0fb3b29 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Jan 2011 10:23:26 +0100 Subject: netfilter: do not omit re-route check on NF_QUEUE verdict ret != NF_QUEUE only works in the "--queue-num 0" case; for queues > 0 the test should be '(ret & NF_VERDICT_MASK) != NF_QUEUE'. However, NF_QUEUE no longer DROPs the skb unconditionally if queueing fails (due to NF_VERDICT_FLAG_QUEUE_BYPASS verdict flag), so the re-route test should also be performed if this flag is set in the verdict. The full test would then look something like && ((ret & NF_VERDICT_MASK) == NF_QUEUE && (ret & NF_VERDICT_FLAG_QUEUE_BYPASS)) This is rather ugly, so just remove the NF_QUEUE test altogether. The only effect is that we might perform an unnecessary route lookup in the NF_QUEUE case. ip6table_mangle did not have such a check. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/iptable_mangle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 294a2a3..aef5d1f 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ - if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { + if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || -- cgit v1.1 From 41a7cab6d329039d614ca5f0f87aff2dfd90637c Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Thu, 20 Jan 2011 15:49:52 +0100 Subject: netfilter: nf_nat: place conntrack in source hash after SNAT is done If SNAT isn't done, the wrong info maybe got by the other cts. As the filter table is after DNAT table, the packets dropped in filter table also bother bysource hash table. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_core.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 3002c04..21bcf47 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { + /* try the original tuple first */ + if (in_range(orig_tuple, range)) { + if (!nf_nat_used_tuple(orig_tuple, ct)) { + *tuple = *orig_tuple; + return; + } + } else if (find_appropriate_src(net, zone, orig_tuple, tuple, + range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct, struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; - int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); /* nat helper or nfctnetlink also setup binding */ nat = nfct_nat(ct); @@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct, ct->status |= IPS_DST_NAT; } - /* Place in source hash if this is the first time. */ - if (have_to_hash) { + if (maniptype == IP_NAT_MANIP_SRC) { unsigned int srchash; srchash = hash_by_src(net, nf_ct_zone(ct), @@ -535,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) if (nat == NULL || nat->ct == NULL) return; - NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK); + NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); spin_lock_bh(&nf_nat_lock); hlist_del_rcu(&nat->bysource); @@ -548,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old) struct nf_conn_nat *old_nat = old; struct nf_conn *ct = old_nat->ct; - if (!ct || !(ct->status & IPS_NAT_DONE_MASK)) + if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) return; spin_lock_bh(&nf_nat_lock); - new_nat->ct = ct; hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); spin_unlock_bh(&nf_nat_lock); } -- cgit v1.1 From bced94ed5efad836859d9426f37f48d46218e99a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 21:00:38 +0100 Subject: netfilter: add a missing include in nf_conntrack_reasm.c After commit ae90bdeaeac6b (netfilter: fix compilation when conntrack is disabled but tproxy is enabled) we have following warnings : net/ipv6/netfilter/nf_conntrack_reasm.c:520:16: warning: symbol 'nf_ct_frag6_gather' was not declared. Should it be static? net/ipv6/netfilter/nf_conntrack_reasm.c:591:6: warning: symbol 'nf_ct_frag6_output' was not declared. Should it be static? net/ipv6/netfilter/nf_conntrack_reasm.c:612:5: warning: symbol 'nf_ct_frag6_init' was not declared. Should it be static? net/ipv6/netfilter/nf_conntrack_reasm.c:640:6: warning: symbol 'nf_ct_frag6_cleanup' was not declared. Should it be static? Fix this including net/netfilter/ipv6/nf_defrag_ipv6.h Signed-off-by: Eric Dumazet CC: KOVACS Krisztian Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 66e003e..0857272 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -45,6 +45,7 @@ #include #include #include +#include struct nf_ct_frag6_skb_cb -- cgit v1.1 From 3fbd8758b027995b677046dae46f9b41ea88c88f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2011 21:23:22 +0000 Subject: net: dev_close_many() is static Signed-off-by: Eric Dumazet CC: Octavian Purdila Reviewed-by: Octavian Purdila Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 8b1d886..a4ccd47 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1285,7 +1285,7 @@ static int __dev_close(struct net_device *dev) return __dev_close_many(&single); } -int dev_close_many(struct list_head *head) +static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; LIST_HEAD(tmp_list); -- cgit v1.1 From 817fb15dfd988d8dda916ee04fa506f0c466b9d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 00:14:58 +0000 Subject: net_sched: sfq: allow divisor to be a parameter SFQ currently uses a 1024 slots hash table, and its internal structure (sfq_sched_data) allocation needs order-1 page on x86_64 Allow tc command to specify a divisor value (hash table size), between 1 and 65536. If no value is provided, assume the 1024 default size. This allows admins to setup smaller (or bigger) SFQ for specific needs. This also brings back sfq_sched_data allocations to order-0 ones, saving 3KB per SFQ qdisc. Jesper uses ~55.000 SFQ in one machine, this patch should free 165 MB of memory. Signed-off-by: Eric Dumazet CC: Patrick McHardy CC: Jesper Dangaard Brouer CC: Jarek Poplawski CC: Jamal Hadi Salim CC: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 54a36f4..156ad30 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +77,8 @@ #define SFQ_DEPTH 128 /* max number of packets per flow */ #define SFQ_SLOTS 128 /* max number of flows */ #define SFQ_EMPTY_SLOT 255 -#define SFQ_HASH_DIVISOR 1024 +#define SFQ_DEFAULT_HASH_DIVISOR 1024 + /* We use 16 bits to store allot, and want to handle packets up to 64K * Scale allot by 8 (1<<3) so that no overflow occurs. */ @@ -112,7 +114,7 @@ struct sfq_sched_data { int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ int limit; - + unsigned int divisor; /* number of slots in hash table */ /* Variables */ struct tcf_proto *filter_list; struct timer_list perturb_timer; @@ -120,7 +122,7 @@ struct sfq_sched_data { sfq_index cur_depth; /* depth of longest slot */ unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct sfq_slot *tail; /* current slot in round */ - sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ + sfq_index *ht; /* Hash table (divisor slots) */ struct sfq_slot slots[SFQ_SLOTS]; struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ }; @@ -137,7 +139,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { - return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); + return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1); } static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) @@ -201,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && - TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) + TC_H_MIN(skb->priority) <= q->divisor) return TC_H_MIN(skb->priority); if (!q->filter_list) @@ -219,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return 0; } #endif - if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) + if (TC_H_MIN(res.classid) <= q->divisor) return TC_H_MIN(res.classid); } return 0; @@ -496,7 +498,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); - + if (ctl->divisor) { + if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536) + return -EINVAL; + q->divisor = ctl->divisor; + } qlen = sch->q.qlen; while (sch->q.qlen > q->limit) sfq_drop(sch); @@ -514,15 +520,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) static int sfq_init(struct Qdisc *sch, struct nlattr *opt) { struct sfq_sched_data *q = qdisc_priv(sch); + size_t sz; int i; q->perturb_timer.function = sfq_perturbation; q->perturb_timer.data = (unsigned long)sch; init_timer_deferrable(&q->perturb_timer); - for (i = 0; i < SFQ_HASH_DIVISOR; i++) - q->ht[i] = SFQ_EMPTY_SLOT; - for (i = 0; i < SFQ_DEPTH; i++) { q->dep[i].next = i + SFQ_SLOTS; q->dep[i].prev = i + SFQ_SLOTS; @@ -531,6 +535,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->limit = SFQ_DEPTH - 1; q->cur_depth = 0; q->tail = NULL; + q->divisor = SFQ_DEFAULT_HASH_DIVISOR; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); @@ -542,6 +547,15 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } + sz = sizeof(q->ht[0]) * q->divisor; + q->ht = kmalloc(sz, GFP_KERNEL); + if (!q->ht && sz > PAGE_SIZE) + q->ht = vmalloc(sz); + if (!q->ht) + return -ENOMEM; + for (i = 0; i < q->divisor; i++) + q->ht[i] = SFQ_EMPTY_SLOT; + for (i = 0; i < SFQ_SLOTS; i++) { slot_queue_init(&q->slots[i]); sfq_link(q, i); @@ -556,6 +570,10 @@ static void sfq_destroy(struct Qdisc *sch) tcf_destroy_chain(&q->filter_list); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); + if (is_vmalloc_addr(q->ht)) + vfree(q->ht); + else + kfree(q->ht); } static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -568,7 +586,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.perturb_period = q->perturb_period / HZ; opt.limit = q->limit; - opt.divisor = SFQ_HASH_DIVISOR; + opt.divisor = q->divisor; opt.flows = q->limit; NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); @@ -646,7 +664,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) if (arg->stop) return; - for (i = 0; i < SFQ_HASH_DIVISOR; i++) { + for (i = 0; i < q->divisor; i++) { if (q->ht[i] == SFQ_EMPTY_SLOT || arg->count < arg->skip) { arg->count++; -- cgit v1.1 From fd245a4adb5288eac37250875f237c40a20a1944 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 05:27:16 +0000 Subject: net_sched: move TCQ_F_THROTTLED flag In commit 371121057607e (net: QDISC_STATE_RUNNING dont need atomic bit ops) I moved QDISC_STATE_RUNNING flag to __state container, located in the cache line containing qdisc lock and often dirtied fields. I now move TCQ_F_THROTTLED bit too, so that we let first cache line read mostly, and shared by all cpus. This should speedup HTB/CBQ for example. Not using test_bit()/__clear_bit()/__test_and_set_bit allows to use an "unsigned int" for __state container, reducing by 8 bytes Qdisc size. Introduce helpers to hide implementation details. Signed-off-by: Eric Dumazet CC: Patrick McHardy CC: Jesper Dangaard Brouer CC: Jarek Poplawski CC: Jamal Hadi Salim CC: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_api.c | 6 +++--- net/sched/sch_cbq.c | 6 +++--- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 4 ++-- net/sched/sch_netem.c | 2 +- net/sched/sch_tbf.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 36ac0ec..374fcbe 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -473,7 +473,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); - wd->qdisc->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(wd->qdisc); __netif_schedule(qdisc_root(wd->qdisc)); return HRTIMER_NORESTART; @@ -495,7 +495,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) &qdisc_root_sleeping(wd->qdisc)->state)) return; - wd->qdisc->flags |= TCQ_F_THROTTLED; + qdisc_throttled(wd->qdisc); time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); @@ -505,7 +505,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { hrtimer_cancel(&wd->timer); - wd->qdisc->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(wd->qdisc); } EXPORT_SYMBOL(qdisc_watchdog_cancel); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4aaf44c..25ed522 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -351,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) { int toplevel = q->toplevel; - if (toplevel > cl->level && !(cl->q->flags & TCQ_F_THROTTLED)) { + if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { psched_time_t now; psched_tdiff_t incr; @@ -625,7 +625,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); } - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); __netif_schedule(qdisc_root(sch)); return HRTIMER_NORESTART; } @@ -974,7 +974,7 @@ cbq_dequeue(struct Qdisc *sch) skb = cbq_dequeue_1(sch); if (skb) { sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); return skb; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index dea4009..b632d92 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1664,7 +1664,7 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); sch->q.qlen--; return skb; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3e86fd3..39db75c 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -865,7 +865,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); if (skb != NULL) { - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); sch->q.qlen--; return skb; } @@ -901,7 +901,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) skb = htb_dequeue_tree(q, prio, level); if (likely(skb != NULL)) { sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); goto fin; } } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index c2bbbe6..c26ef36 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -266,7 +266,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - if (sch->flags & TCQ_F_THROTTLED) + if (qdisc_is_throttled(sch)) return NULL; skb = q->qdisc->ops->peek(q->qdisc); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 475edfb..86c0166 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -185,7 +185,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) q->tokens = toks; q->ptokens = ptoks; sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; + qdisc_unthrottled(sch); return skb; } -- cgit v1.1 From a2da570d62fcb9e8816f6920e1ec02c706b289fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 03:48:19 +0000 Subject: net_sched: RCU conversion of stab This patch converts stab qdisc management to RCU, so that we can perform the qdisc_calculate_pkt_len() call before getting qdisc lock. This shortens the lock's held time in __dev_xmit_skb(). This permits more qdiscs to get TCQ_F_CAN_BYPASS status, avoiding lot of cache misses and so reducing latencies. Signed-off-by: Eric Dumazet CC: Patrick McHardy CC: Jesper Dangaard Brouer CC: Jarek Poplawski CC: Jamal Hadi Salim CC: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- net/sched/sch_api.c | 26 +++++++++++++++++--------- net/sched/sch_generic.c | 2 +- 3 files changed, 23 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index a4ccd47..2730352 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2325,15 +2325,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct netdev_queue *txq) { spinlock_t *root_lock = qdisc_lock(q); - bool contended = qdisc_is_running(q); + bool contended; int rc; + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_calculate_pkt_len(skb, q); /* * Heuristic to force contended enqueues to serialize on a * separate lock before trying to get qdisc main lock. * This permits __QDISC_STATE_RUNNING owner to get the lock more often * and dequeue packets faster. */ + contended = qdisc_is_running(q); if (unlikely(contended)) spin_lock(&q->busylock); @@ -2351,7 +2354,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - qdisc_skb_cb(skb)->pkt_len = skb->len; qdisc_bstats_update(q, skb); if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { @@ -2366,7 +2368,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); - rc = qdisc_enqueue_root(skb, q); + rc = q->enqueue(skb, q) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 374fcbe..1507415 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -398,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) return stab; } +static void stab_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct qdisc_size_table, rcu)); +} + void qdisc_put_stab(struct qdisc_size_table *tab) { if (!tab) @@ -407,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (--tab->refcnt == 0) { list_del(&tab->list); - kfree(tab); + call_rcu_bh(&tab->rcu, stab_kfree_rcu); } spin_unlock(&qdisc_stab_lock); @@ -430,7 +435,7 @@ nla_put_failure: return -1; } -void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -456,7 +461,7 @@ out: pkt_len = 1; qdisc_skb_cb(skb)->pkt_len = pkt_len; } -EXPORT_SYMBOL(qdisc_calculate_pkt_len); +EXPORT_SYMBOL(__qdisc_calculate_pkt_len); void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) { @@ -835,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, err = PTR_ERR(stab); goto err_out4; } - sch->stab = stab; + rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { spinlock_t *root_lock; @@ -875,7 +880,7 @@ err_out4: * Any broken qdiscs that would require a ops->reset() here? * The qdisc was never in action so it shouldn't be necessary. */ - qdisc_put_stab(sch->stab); + qdisc_put_stab(rtnl_dereference(sch->stab)); if (ops->destroy) ops->destroy(sch); goto err_out3; @@ -883,7 +888,7 @@ err_out4: static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) { - struct qdisc_size_table *stab = NULL; + struct qdisc_size_table *ostab, *stab = NULL; int err = 0; if (tca[TCA_OPTIONS]) { @@ -900,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) return PTR_ERR(stab); } - qdisc_put_stab(sch->stab); - sch->stab = stab; + ostab = rtnl_dereference(sch->stab); + rcu_assign_pointer(sch->stab, stab); + qdisc_put_stab(ostab); if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator @@ -1180,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; + struct qdisc_size_table *stab; nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); tcm = NLMSG_DATA(nlh); @@ -1195,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto nla_put_failure; q->qstats.qlen = q->q.qlen; - if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) + stab = rtnl_dereference(q->stab); + if (stab && qdisc_dump_stab(skb, stab) < 0) goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2f1cb62..cc17e79 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -632,7 +632,7 @@ void qdisc_destroy(struct Qdisc *qdisc) #ifdef CONFIG_NET_SCHED qdisc_list_del(qdisc); - qdisc_put_stab(qdisc->stab); + qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); if (ops->reset) -- cgit v1.1 From 753ea8e96258d87be1951083b5c4a368524515f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 07:16:24 +0000 Subject: net: ipv6: sit: fix rcu annotations Fix minor __rcu annotations and remove sparse warnings Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8ce38f1..b1599a3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head) p = container_of(head, struct ip_tunnel_prl_entry, rcu_head); do { - n = p->next; + n = rcu_dereference_protected(p->next, 1); kfree(p); p = n; } while (p); @@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head) static int ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) { - struct ip_tunnel_prl_entry *x, **p; + struct ip_tunnel_prl_entry *x; + struct ip_tunnel_prl_entry __rcu **p; int err = 0; ASSERT_RTNL(); if (a && a->addr != htonl(INADDR_ANY)) { - for (p = &t->prl; *p; p = &(*p)->next) { - if ((*p)->addr == a->addr) { - x = *p; + for (p = &t->prl; + (x = rtnl_dereference(*p)) != NULL; + p = &x->next) { + if (x->addr == a->addr) { *p = x->next; call_rcu(&x->rcu_head, prl_entry_destroy_rcu); t->prl_count--; @@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) } err = -ENXIO; } else { - if (t->prl) { + x = rtnl_dereference(t->prl); + if (x) { t->prl_count = 0; - x = t->prl; call_rcu(&x->rcu_head, prl_list_destroy_rcu); t->prl = NULL; } @@ -1179,7 +1181,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; dev_hold(dev); - sitn->tunnels_wc[0] = tunnel; + rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; } @@ -1196,11 +1198,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t = sitn->tunnels[prio][h]; + struct ip_tunnel *t; + t = rtnl_dereference(sitn->tunnels[prio][h]); while (t != NULL) { unregister_netdevice_queue(t->dev, head); - t = t->next; + t = rtnl_dereference(t->next); } } } -- cgit v1.1 From 6193d2be290990b789021e06fa770ecb45319f2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2011 22:02:47 +0000 Subject: neigh: __rcu annotations fix some minor issues and sparse (__rcu) warnings Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 60a9029..799f06e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) { size_t size = entries * sizeof(struct neighbour *); struct neigh_hash_table *ret; - struct neighbour **buckets; + struct neighbour __rcu **buckets; ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) @@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) if (size <= PAGE_SIZE) buckets = kzalloc(size, GFP_ATOMIC); else - buckets = (struct neighbour **) + buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); if (!buckets) { kfree(ret); return NULL; } - rcu_assign_pointer(ret->hash_buckets, buckets); + ret->hash_buckets = buckets; ret->hash_mask = entries - 1; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); return ret; @@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct neigh_hash_table, rcu); size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); - struct neighbour **buckets = nht->hash_buckets; + struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) kfree(buckets); @@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour proc dir entry"); #endif - tbl->nht = neigh_hash_alloc(8); + RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); @@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl) } write_unlock(&neigh_tbl_lock); - call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); + call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, + neigh_hash_free_rcu); tbl->nht = NULL; kfree(tbl->phash_buckets); -- cgit v1.1 From f2eda47df46f9953fc8a4fec820f34d539a8efbb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 07:37:53 +0000 Subject: ipv6: raw: rcu annotations Remove sparse warnings, using a function typedef to be able to use __rcu annotation on mh_filter pointer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/raw.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 86c3952..2bc6cd7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -123,18 +123,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); +typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); -int rawv6_mh_filter_register(int (*filter)(struct sock *sock, - struct sk_buff *skb)) +static mh_filter_t __rcu *mh_filter __read_mostly; + +int rawv6_mh_filter_register(mh_filter_t filter) { rcu_assign_pointer(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); -int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, - struct sk_buff *skb)) +int rawv6_mh_filter_unregister(mh_filter_t filter) { rcu_assign_pointer(mh_filter, NULL); synchronize_rcu(); @@ -192,10 +192,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) * policy is placed in rawv6_rcv() because it is * required for each socket. */ - int (*filter)(struct sock *sock, struct sk_buff *skb); + mh_filter_t *filter; filter = rcu_dereference(mh_filter); - filtered = filter ? filter(sk, skb) : 0; + filtered = filter ? (*filter)(sk, skb) : 0; break; } #endif -- cgit v1.1 From ffa934f192c8381061242eb170419266ef229902 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Jan 2011 03:00:42 +0000 Subject: rtnetlink: fix link attribute validation with IFLA_GROUP rtnl_group_changelink() is invoked by rtnl_newlink() before the link attributes have been validated. Additionally the group changes are performed even if NLM_F_CREATE is specified and a new link is created, while more reasonable semantics would be to set the group value on the newly created link. Fix both problems by moving the rtnl_group_changelink() invocation down to the handling of non-existant links without NLM_F_CREATE() and add a dev_set_group() call to rtnl_create_link(). Signed-off-by: Patrick McHardy Acked-by: Vlad Dogaru Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a0b2eeb..310eb80 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1548,6 +1548,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + if (tb[IFLA_GROUP]) + dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); return dev; @@ -1606,10 +1608,6 @@ replay: else { if (ifname[0]) dev = __dev_get_by_name(net, ifname); - else if (tb[IFLA_GROUP]) - return rtnl_group_changelink(net, - nla_get_u32(tb[IFLA_GROUP]), - ifm, tb); else dev = NULL; } @@ -1676,8 +1674,13 @@ replay: return do_setlink(dev, ifm, tb, ifname, modified); } - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + if (ifm->ifi_index == 0 && tb[IFLA_GROUP]) + return rtnl_group_changelink(net, + nla_get_u32(tb[IFLA_GROUP]), + ifm, tb); return -ENODEV; + } if (ifm->ifi_index) return -EOPNOTSUPP; -- cgit v1.1 From b305dae488193b65cfa80e1c06c0aa0ce60005a9 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sat, 8 Jan 2011 10:30:54 -0800 Subject: mac80211: Fix skb-copy failure debug message. This particular error isn't about multicast. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1236710..f36d70f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2711,7 +2711,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, if (!skb) { if (net_ratelimit()) wiphy_debug(local->hw.wiphy, - "failed to copy multicast frame for %s\n", + "failed to copy skb for %s\n", sdata->name); return true; } -- cgit v1.1 From 59eb21a6504731fc16db4cf9463065dd61093e08 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Mon, 17 Jan 2011 13:37:28 +0900 Subject: cfg80211: Extend channel to frequency mapping for 802.11j Extend channel to frequency mapping for 802.11j Japan 4.9GHz band, according to IEEE802.11 section 17.3.8.3.2 and Annex J. Because there are now overlapping channel numbers in the 2GHz and 5GHz band we can't map from channel to frequency without knowing the band. This is no problem as in most contexts we know the band. In places where we don't know the band (and WEXT compatibility) we assume the 2GHz band for channels below 14. This patch does not implement all channel to frequency mappings defined in 802.11, it's just an extension for 802.11j 20MHz channels. 5MHz and 10MHz channels as well as 802.11y channels have been omitted. The following drivers have been updated to reflect the API changes: iwl-3945, iwl-agn, iwmc3200wifi, libertas, mwl8k, rt2x00, wl1251, wl12xx. The drivers have been compile-tested only. Signed-off-by: Bruno Randolf Signed-off-by: Brian Prodoehl Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 3 ++- net/mac80211/mesh.c | 2 +- net/mac80211/mlme.c | 8 +++++--- net/mac80211/scan.c | 3 ++- net/wireless/reg.c | 6 +++--- net/wireless/util.c | 36 ++++++++++++++++++++++-------------- net/wireless/wext-compat.c | 5 ++++- 7 files changed, 39 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 53c7077..775fb63 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -270,7 +270,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, enum ieee80211_band band = rx_status->band; if (elems->ds_params && elems->ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems->ds_params[0]); + freq = ieee80211_channel_to_frequency(elems->ds_params[0], + band); else freq = rx_status->freq; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 2563fd1..2a57cc0 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -574,7 +574,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, &elems); if (elems.ds_params && elems.ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + freq = ieee80211_channel_to_frequency(elems.ds_params[0], band); else freq = rx_status->freq; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index eecbb1f..3221069 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -176,7 +176,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, /* check that channel matches the right operating channel */ if (local->hw.conf.channel->center_freq != - ieee80211_channel_to_frequency(hti->control_chan)) + ieee80211_channel_to_frequency(hti->control_chan, sband->band)) enable_ht = false; if (enable_ht) { @@ -429,7 +429,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, container_of((void *)bss, struct cfg80211_bss, priv); struct ieee80211_channel *new_ch; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); + int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num, + cbss->channel->band); ASSERT_MGD_MTX(ifmgd); @@ -1519,7 +1520,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } if (elems->ds_params && elems->ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems->ds_params[0]); + freq = ieee80211_channel_to_frequency(elems->ds_params[0], + rx_status->band); else freq = rx_status->freq; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index fb274db..1ef73be 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -196,7 +196,8 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) ieee802_11_parse_elems(elements, skb->len - baselen, &elems); if (elems.ds_params && elems.ds_params_len == 1) - freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + freq = ieee80211_channel_to_frequency(elems.ds_params[0], + rx_status->band); else freq = rx_status->freq; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 37693b6..c565689 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1801,9 +1801,9 @@ void regulatory_hint_disconnect(void) static bool freq_is_chan_12_13_14(u16 freq) { - if (freq == ieee80211_channel_to_frequency(12) || - freq == ieee80211_channel_to_frequency(13) || - freq == ieee80211_channel_to_frequency(14)) + if (freq == ieee80211_channel_to_frequency(12, IEEE80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(13, IEEE80211_BAND_2GHZ) || + freq == ieee80211_channel_to_frequency(14, IEEE80211_BAND_2GHZ)) return true; return false; } diff --git a/net/wireless/util.c b/net/wireless/util.c index 7620ae2..4ed065d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -29,29 +29,37 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); -int ieee80211_channel_to_frequency(int chan) +int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { - if (chan < 14) - return 2407 + chan * 5; - - if (chan == 14) - return 2484; - - /* FIXME: 802.11j 17.3.8.3.2 */ - return (chan + 1000) * 5; + /* see 802.11 17.3.8.3.2 and Annex J + * there are overlapping channel numbers in 5GHz and 2GHz bands */ + if (band == IEEE80211_BAND_5GHZ) { + if (chan >= 182 && chan <= 196) + return 4000 + chan * 5; + else + return 5000 + chan * 5; + } else { /* IEEE80211_BAND_2GHZ */ + if (chan == 14) + return 2484; + else if (chan < 14) + return 2407 + chan * 5; + else + return 0; /* not supported */ + } } EXPORT_SYMBOL(ieee80211_channel_to_frequency); int ieee80211_frequency_to_channel(int freq) { + /* see 802.11 17.3.8.3.2 and Annex J */ if (freq == 2484) return 14; - - if (freq < 2484) + else if (freq < 2484) return (freq - 2407) / 5; - - /* FIXME: 802.11j 17.3.8.3.2 */ - return freq/5 - 1000; + else if (freq >= 4910 && freq <= 4980) + return (freq - 4000) / 5; + else + return (freq - 5000) / 5; } EXPORT_SYMBOL(ieee80211_frequency_to_channel); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 3e5dbd4..7f1f4ec 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -267,9 +267,12 @@ int cfg80211_wext_freq(struct wiphy *wiphy, struct iw_freq *freq) * -EINVAL for impossible things. */ if (freq->e == 0) { + enum ieee80211_band band = IEEE80211_BAND_2GHZ; if (freq->m < 0) return 0; - return ieee80211_channel_to_frequency(freq->m); + if (freq->m > 14) + band = IEEE80211_BAND_5GHZ; + return ieee80211_channel_to_frequency(freq->m, band); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) -- cgit v1.1 From bb134d2298b49f50cf6d9388410fba96272905dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Jan 2011 19:18:08 +0000 Subject: net: netif_setup_tc() is static Signed-off-by: Eric Dumazet Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2730352..47d3d78 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1605,7 +1605,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) * expected that drivers will fix this mapping if they can before * calling netif_set_real_num_tx_queues. */ -void netif_setup_tc(struct net_device *dev, unsigned int txq) +static void netif_setup_tc(struct net_device *dev, unsigned int txq) { int i; struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; -- cgit v1.1 From 23624935e0c4b04730ed8d7d21f0cd25b2c2cda1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Jan 2011 16:26:09 -0800 Subject: net_sched: TCQ_F_CAN_BYPASS generalization Now qdisc stab is handled before TCQ_F_CAN_BYPASS test in __dev_xmit_skb(), we can generalize TCQ_F_CAN_BYPASS to other qdiscs than pfifo_fast : pfifo, bfifo, pfifo_head_drop and sfq SFQ is special because it can have external classifiers, and in these cases, we cannot bypass queue discipline (packet could be dropped by classifier) without admin asking it, or further changes. Its worth doing this, especially for SFQ, avoiding dirtying memory in case no packets are already waiting in queue. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fifo.c | 13 ++++++++++++- net/sched/sch_generic.c | 5 ++--- net/sched/sch_mq.c | 1 - net/sched/sch_mqprio.c | 1 - net/sched/sch_sfq.c | 6 ++++++ 5 files changed, 20 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b3075f8..f7290d2 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -64,11 +64,13 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int fifo_init(struct Qdisc *sch, struct nlattr *opt) { struct fifo_sched_data *q = qdisc_priv(sch); + bool bypass; + bool is_bfifo = sch->ops == &bfifo_qdisc_ops; if (opt == NULL) { u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; - if (sch->ops == &bfifo_qdisc_ops) + if (is_bfifo) limit *= psched_mtu(qdisc_dev(sch)); q->limit = limit; @@ -81,6 +83,15 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) q->limit = ctl->limit; } + if (is_bfifo) + bypass = q->limit >= psched_mtu(qdisc_dev(sch)); + else + bypass = q->limit >= 1; + + if (bypass) + sch->flags |= TCQ_F_CAN_BYPASS; + else + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cc17e79..0da09d5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -527,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) skb_queue_head_init(band2list(priv, prio)); + /* Can by-pass the queue discipline */ + qdisc->flags |= TCQ_F_CAN_BYPASS; return 0; } @@ -691,9 +693,6 @@ static void attach_one_default_qdisc(struct net_device *dev, netdev_info(dev, "activation failed\n"); return; } - - /* Can by-pass the queue discipline for default qdisc */ - qdisc->flags |= TCQ_F_CAN_BYPASS; } dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index ecc302f..ec5cbc8 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) TC_H_MIN(ntx + 1))); if (qdisc == NULL) goto err; - qdisc->flags |= TCQ_F_CAN_BYPASS; priv->qdiscs[ntx] = qdisc; } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 8620c65..fbc6f53 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -130,7 +130,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) err = -ENOMEM; goto err; } - qdisc->flags |= TCQ_F_CAN_BYPASS; priv->qdiscs[i] = qdisc; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 156ad30..fdba52a 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -560,6 +560,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) slot_queue_init(&q->slots[i]); sfq_link(q, i); } + if (q->limit >= 1) + sch->flags |= TCQ_F_CAN_BYPASS; + else + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } @@ -611,6 +615,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid) static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { + /* we cannot bypass queue discipline anymore */ + sch->flags &= ~TCQ_F_CAN_BYPASS; return 0; } -- cgit v1.1 From 091bb34c143674d37a59b2d4857534f7106c5d7d Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 21 Jan 2011 18:02:13 +0800 Subject: netfilter: ipvs: fix compiler warnings Fix compiler warnings when no transport protocol load balancing support is configured. [horms@verge.net.au: removed suprious __ip_vs_cleanup() clean-up hunk] Signed-off-by: Changli Gao Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++++ net/netfilter/ipvs/ip_vs_proto.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 09ca2ce..68b8033 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2062,7 +2062,9 @@ static const struct file_operations ip_vs_stats_percpu_fops = { */ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) { +#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) struct ip_vs_proto_data *pd; +#endif IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", u->tcp_timeout, @@ -2405,7 +2407,9 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, static inline void __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) { +#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) struct ip_vs_proto_data *pd; +#endif #ifdef CONFIG_IP_VS_PROTO_TCP pd = ip_vs_proto_data_get(net, IPPROTO_TCP); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 6ac986c..17484a4 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -60,6 +60,9 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) return 0; } +#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) || \ + defined(CONFIG_IP_VS_PROTO_SCTP) || defined(CONFIG_IP_VS_PROTO_AH) || \ + defined(CONFIG_IP_VS_PROTO_ESP) /* * register an ipvs protocols netns related data */ @@ -85,6 +88,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) return 0; } +#endif /* * unregister an ipvs protocol -- cgit v1.1 From 4b3fd57138c969dd940651fadf90db627254edbf Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sat, 22 Jan 2011 13:48:01 +1100 Subject: IPVS: Change sock_create_kernel() to __sock_create() The recent netns changes omitted to change sock_create_kernel() to __sock_create() in ip_vs_sync.c The effect of this is that the interface will be selected in the root-namespace, from my point of view it's a major bug. Reported-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d1adf98..d5a6e64 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1305,7 +1305,7 @@ static struct socket *make_send_sock(struct net *net) int result; /* First create a socket */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); @@ -1351,7 +1351,7 @@ static struct socket *make_receive_sock(struct net *net) int result; /* First create a socket */ - result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); -- cgit v1.1 From c445477d74ab3779d1386ab797fbb9b628eb9f64 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 19 Jan 2011 11:03:53 +0000 Subject: net: RPS: Enable hardware acceleration of RFS Allow drivers for multiqueue hardware with flow filter tables to accelerate RFS. The driver must: 1. Set net_device::rx_cpu_rmap to a cpu_rmap of the RX completion IRQs (in queue order). This will provide a mapping from CPUs to the queues for which completions are handled nearest to them. 2. Implement net_device_ops::ndo_rx_flow_steer. This operation adds or replaces a filter steering the given flow to the given RX queue, if possible. 3. Periodically remove filters for which rps_may_expire_flow() returns true. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/Kconfig | 6 ++++ net/core/dev.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 97 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 7284062..79cabf1 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -221,6 +221,12 @@ config RPS depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y +config RFS_ACCEL + boolean + depends on RPS && GENERIC_HARDIRQS + select CPU_RMAP + default y + config XPS boolean depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS diff --git a/net/core/dev.c b/net/core/dev.c index d162ba8..aa76147 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2588,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); +static struct rps_dev_flow * +set_rps_cpu(struct net_device *dev, struct sk_buff *skb, + struct rps_dev_flow *rflow, u16 next_cpu) +{ + u16 tcpu; + + tcpu = rflow->cpu = next_cpu; + if (tcpu != RPS_NO_CPU) { +#ifdef CONFIG_RFS_ACCEL + struct netdev_rx_queue *rxqueue; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *old_rflow; + u32 flow_id; + u16 rxq_index; + int rc; + + /* Should we steer this flow to a different hardware queue? */ + if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap) + goto out; + rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); + if (rxq_index == skb_get_rx_queue(skb)) + goto out; + + rxqueue = dev->_rx + rxq_index; + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (!flow_table) + goto out; + flow_id = skb->rxhash & flow_table->mask; + rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, + rxq_index, flow_id); + if (rc < 0) + goto out; + old_rflow = rflow; + rflow = &flow_table->flows[flow_id]; + rflow->cpu = next_cpu; + rflow->filter = rc; + if (old_rflow->filter == rflow->filter) + old_rflow->filter = RPS_NO_FILTER; + out: +#endif + rflow->last_qtail = + per_cpu(softnet_data, tcpu).input_queue_head; + } + + return rflow; +} + /* * get_rps_cpu is called from netif_receive_skb and returns the target * CPU from the RPS map of the receiving queue for a given skb. @@ -2658,12 +2706,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (unlikely(tcpu != next_cpu) && (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - - rflow->last_qtail)) >= 0)) { - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) - rflow->last_qtail = per_cpu(softnet_data, - tcpu).input_queue_head; - } + rflow->last_qtail)) >= 0)) + rflow = set_rps_cpu(dev, skb, rflow, next_cpu); + if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; @@ -2684,6 +2729,46 @@ done: return cpu; } +#ifdef CONFIG_RFS_ACCEL + +/** + * rps_may_expire_flow - check whether an RFS hardware filter may be removed + * @dev: Device on which the filter was set + * @rxq_index: RX queue index + * @flow_id: Flow ID passed to ndo_rx_flow_steer() + * @filter_id: Filter ID returned by ndo_rx_flow_steer() + * + * Drivers that implement ndo_rx_flow_steer() should periodically call + * this function for each installed filter and remove the filters for + * which it returns %true. + */ +bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, + u32 flow_id, u16 filter_id) +{ + struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; + struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *rflow; + bool expire = true; + int cpu; + + rcu_read_lock(); + flow_table = rcu_dereference(rxqueue->rps_flow_table); + if (flow_table && flow_id <= flow_table->mask) { + rflow = &flow_table->flows[flow_id]; + cpu = ACCESS_ONCE(rflow->cpu); + if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + ((int)(per_cpu(softnet_data, cpu).input_queue_head - + rflow->last_qtail) < + (int)(10 * flow_table->mask))) + expire = false; + } + rcu_read_unlock(); + return expire; +} +EXPORT_SYMBOL(rps_may_expire_flow); + +#endif /* CONFIG_RFS_ACCEL */ + /* Called from hardirq (IPI) context */ static void rps_trigger_softirq(void *data) { -- cgit v1.1 From 57422dc530115e427dff464cc0a32bcd0efb5008 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 22 Jan 2011 12:14:12 +0000 Subject: net: Move check of checksum features to netdev_fix_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/dev.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index aa76147..ad37418 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5215,6 +5215,23 @@ static void rollback_registered(struct net_device *dev) unsigned long netdev_fix_features(unsigned long features, const char *name) { + /* Fix illegal checksum combinations */ + if ((features & NETIF_F_HW_CSUM) && + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + if (name) + printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", + name); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); + } + + if ((features & NETIF_F_NO_CSUM) && + (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { + if (name) + printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", + name); + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); + } + /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { @@ -5390,21 +5407,6 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Fix illegal checksum combinations */ - if ((dev->features & NETIF_F_HW_CSUM) && - (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); - } - - if ((dev->features & NETIF_F_NO_CSUM) && - (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - dev->name); - dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); - } - dev->features = netdev_fix_features(dev->features, dev->name); /* Enable software GSO if SG is supported. */ -- cgit v1.1 From 04ed3e741d0f133e02bed7fa5c98edba128f90e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 24 Jan 2011 15:32:47 -0800 Subject: net: change netdev->features to u32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting Ben Hutchings: we presumably won't be defining features that can only be enabled on 64-bit architectures. Occurences found by `grep -r` on net/, drivers/net, include/ [ Move features and vlan_features next to each other in struct netdev, as per Eric Dumazet's suggestion -DaveM ] Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- net/bridge/br_if.c | 2 +- net/bridge/br_private.h | 2 +- net/core/dev.c | 15 +++++++-------- net/core/ethtool.c | 2 +- net/core/net-sysfs.c | 2 +- net/core/skbuff.c | 4 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/udp.c | 2 +- 12 files changed, 19 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 6e64f7c..7850412 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -327,7 +327,7 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { - unsigned long old_features = vlandev->features; + u32 old_features = vlandev->features; vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index d9d1e2b..52ce4a3 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -365,7 +365,7 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features, mask; + u32 features, mask; features = mask = br->feature_mask; if (list_empty(&br->port_list)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 84aac77..9f22898 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -182,7 +182,7 @@ struct net_bridge struct br_cpu_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; - unsigned long feature_mask; + u32 feature_mask; #ifdef CONFIG_BRIDGE_NETFILTER struct rtable fake_rtable; bool nf_call_iptables; diff --git a/net/core/dev.c b/net/core/dev.c index ad37418..7103f89 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1858,7 +1858,7 @@ EXPORT_SYMBOL(skb_checksum_help); * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. */ -struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; @@ -2046,7 +2046,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) protocol == htons(ETH_P_FCOE))); } -static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features) { if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; @@ -2058,10 +2058,10 @@ static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features return features; } -int netif_skb_features(struct sk_buff *skb) +u32 netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - int features = skb->dev->features; + u32 features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; @@ -2106,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - int features; + u32 features; /* * If device doesnt need skb->dst, release it right now while @@ -5213,7 +5213,7 @@ static void rollback_registered(struct net_device *dev) rollback_registered_many(&single); } -unsigned long netdev_fix_features(unsigned long features, const char *name) +u32 netdev_fix_features(u32 features, const char *name) { /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && @@ -6143,8 +6143,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, * @one to the master device with current feature set @all. Will not * enable anything that is off in @mask. Returns the new feature set. */ -unsigned long netdev_increment_features(unsigned long all, unsigned long one, - unsigned long mask) +u32 netdev_increment_features(u32 all, u32 one, u32 mask) { /* If device needs checksumming, downgrade to it. */ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1774178..bd1af99 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1458,7 +1458,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; - unsigned long old_features; + u32 old_features; if (!dev || !netif_device_present(dev)) return -ENODEV; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e23c01b..81367cc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -99,7 +99,7 @@ NETDEVICE_SHOW(addr_assign_type, fmt_dec); NETDEVICE_SHOW(addr_len, fmt_dec); NETDEVICE_SHOW(iflink, fmt_dec); NETDEVICE_SHOW(ifindex, fmt_dec); -NETDEVICE_SHOW(features, fmt_long_hex); +NETDEVICE_SHOW(features, fmt_hex); NETDEVICE_SHOW(type, fmt_dec); NETDEVICE_SHOW(link_mode, fmt_dec); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d31bb36..436c4c4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2497,7 +2497,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, int features) +struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; @@ -2507,7 +2507,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) unsigned int offset = doffset; unsigned int headroom; unsigned int len; - int sg = features & NETIF_F_SG; + int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b6110..e5e2d9d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1215,7 +1215,7 @@ out: return err; } -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct iphdr *iph; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6c11eec..f9867d2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2653,7 +2653,7 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8157b17..d37baaa 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2199,7 +2199,7 @@ int udp4_ufo_send_check(struct sk_buff *skb) return 0; } -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 978e80e..3194aa9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -772,7 +772,7 @@ out: return err; } -static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features) +static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct ipv6hdr *ipv6h; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9a009c6..a419a78 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1299,7 +1299,7 @@ static int udp6_ufo_send_check(struct sk_buff *skb) return 0; } -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) +static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; -- cgit v1.1 From acd1130e8793fb150fb522da8ec51675839eb4b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 24 Jan 2011 15:45:15 -0800 Subject: net: reduce and unify printk level in netdev_fix_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce printk() levels to KERN_INFO in netdev_fix_features() as this will be used by ethtool and might spam dmesg unnecessarily. This converts the function to use netdev_info() instead of plain printk(). As a side effect, bonding and bridge devices will now log dropped features on every slave device change. Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/bridge/br_if.c | 2 +- net/core/dev.c | 33 ++++++++++++--------------------- 2 files changed, 13 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 52ce4a3..2a6801d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -379,7 +379,7 @@ void br_features_recompute(struct net_bridge *br) } done: - br->dev->features = netdev_fix_features(features, NULL); + br->dev->features = netdev_fix_features(br->dev, features); } /* called with RTNL */ diff --git a/net/core/dev.c b/net/core/dev.c index 7103f89..1b4c07f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5213,58 +5213,49 @@ static void rollback_registered(struct net_device *dev) rollback_registered_many(&single); } -u32 netdev_fix_features(u32 features, const char *name) +u32 netdev_fix_features(struct net_device *dev, u32 features) { /* Fix illegal checksum combinations */ if ((features & NETIF_F_HW_CSUM) && (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", - name); + netdev_info(dev, "mixed HW and IP checksum settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } if ((features & NETIF_F_NO_CSUM) && (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", - name); + netdev_info(dev, "mixed no checksumming and other settings.\n"); features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } /* Fix illegal SG+CSUM combinations. */ if ((features & NETIF_F_SG) && !(features & NETIF_F_ALL_CSUM)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " - "checksum feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_SG since no checksum feature.\n"); features &= ~NETIF_F_SG; } /* TSO requires that SG is present as well. */ if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { - if (name) - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " - "SG feature.\n", name); + netdev_info(dev, "Dropping NETIF_F_TSO since no SG feature.\n"); features &= ~NETIF_F_TSO; } + /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ if (!((features & NETIF_F_GEN_CSUM) || (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no checksum offload features.\n", - name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no checksum offload features.\n"); features &= ~NETIF_F_UFO; } if (!(features & NETIF_F_SG)) { - if (name) - printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_SG feature.\n", name); + netdev_info(dev, + "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); features &= ~NETIF_F_UFO; } } @@ -5407,7 +5398,7 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - dev->features = netdev_fix_features(dev->features, dev->name); + dev->features = netdev_fix_features(dev, dev->features); /* Enable software GSO if SG is supported. */ if (dev->features & NETIF_F_SG) -- cgit v1.1 From 07924709f68b3f4f701d4efd6acd18ca4ee14de3 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 24 Jan 2011 15:14:41 +0100 Subject: IPVS netns BUG, register sysctl for root ns The newly created table was not used when register sysctl for a new namespace. I.e. sysctl doesn't work for other than root namespace (init_net) Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 68b8033..98df59a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3556,7 +3556,7 @@ int __net_init __ip_vs_control_init(struct net *net) ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, - vs_vars); + tbl); if (ipvs->sysctl_hdr == NULL) goto err_reg; ip_vs_new_estimator(net, ipvs->tot_stats); -- cgit v1.1 From a512b92b3af4b03fc6834617a042dc85fbd4e34e Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Mon, 24 Jan 2011 03:37:29 +0000 Subject: net: add sysfs entry for device group The group of a network device can be queried or changed from userspace using sysfs. For example, considering sysfs mounted in /sys, one can change the group that interface lo belongs to: echo 1 > /sys/class/net/lo/group Signed-off-by: Vlad Dogaru Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 81367cc..2e4a393 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -295,6 +295,20 @@ static ssize_t show_ifalias(struct device *dev, return ret; } +NETDEVICE_SHOW(group, fmt_dec); + +static int change_group(struct net_device *net, unsigned long new_group) +{ + dev_set_group(net, (int) new_group); + return 0; +} + +static ssize_t store_group(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_group); +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL), __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), @@ -316,6 +330,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), + __ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group), {} }; -- cgit v1.1 From 9f4e1ccd80530609bbceec68ae3831697b5c6a68 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 25 Jan 2011 12:40:18 +0800 Subject: netfilter: ipvs: fix compiler warnings Fix compiler warnings when IP_VS_DBG() isn't defined. Signed-off-by: Changli Gao Acked-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index f36a84f..d889f4f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1894,9 +1894,7 @@ static int __net_init __ip_vs_init(struct net *net) static void __net_exit __ip_vs_cleanup(struct net *net) { - struct netns_ipvs *ipvs = net_ipvs(net); - - IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen); + IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen); } static struct pernet_operations ipvs_core_ops = { -- cgit v1.1 From 26ad787962ef84677a48c56039d3c9769b84f847 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Jan 2011 13:26:05 -0800 Subject: pktgen: speedup fragmented skbs We spend lot of time clearing pages in pktgen. (Or not clearing them on ipv6 and leaking kernel memory) Since we dont modify them, we can use one zeroed page, and get references on it. This page can use NUMA affinity as well. Define pktgen_finalize_skb() helper, used both in ipv4 and ipv6 Results using skbs with one frag : Before patch : Result: OK: 608980458(c608978520+d1938) nsec, 1000000000 (100byte,1frags) 1642088pps 1313Mb/sec (1313670400bps) errors: 0 After patch : Result: OK: 345285014(c345283891+d1123) nsec, 1000000000 (100byte,1frags) 2896158pps 2316Mb/sec (2316926400bps) errors: 0 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/pktgen.c | 234 ++++++++++++++++++++++-------------------------------- 1 file changed, 93 insertions(+), 141 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a9e7fc4..d73b77a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -251,6 +251,7 @@ struct pktgen_dev { int max_pkt_size; /* = ETH_ZLEN; */ int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; + struct page *page; u64 delay; /* nano-seconds */ __u64 count; /* Default No packets to send */ @@ -1134,6 +1135,10 @@ static ssize_t pktgen_if_write(struct file *file, if (node_possible(value)) { pkt_dev->node = value; sprintf(pg_result, "OK: node=%d", pkt_dev->node); + if (pkt_dev->page) { + put_page(pkt_dev->page); + pkt_dev->page = NULL; + } } else sprintf(pg_result, "ERROR: node not possible"); @@ -2605,6 +2610,90 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, return htons(id | (cfi << 12) | (prio << 13)); } +static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, + int datalen) +{ + struct timeval timestamp; + struct pktgen_hdr *pgh; + + pgh = (struct pktgen_hdr *)skb_put(skb, sizeof(*pgh)); + datalen -= sizeof(*pgh); + + if (pkt_dev->nfrags <= 0) { + pgh = (struct pktgen_hdr *)skb_put(skb, datalen); + memset(pgh + 1, 0, datalen); + } else { + int frags = pkt_dev->nfrags; + int i, len; + + + if (frags > MAX_SKB_FRAGS) + frags = MAX_SKB_FRAGS; + len = datalen - frags * PAGE_SIZE; + if (len > 0) { + memset(skb_put(skb, len), 0, len); + datalen = frags * PAGE_SIZE; + } + + i = 0; + while (datalen > 0) { + if (unlikely(!pkt_dev->page)) { + int node = numa_node_id(); + + if (pkt_dev->node >= 0 && (pkt_dev->flags & F_NODE)) + node = pkt_dev->node; + pkt_dev->page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); + if (!pkt_dev->page) + break; + } + skb_shinfo(skb)->frags[i].page = pkt_dev->page; + get_page(pkt_dev->page); + skb_shinfo(skb)->frags[i].page_offset = 0; + skb_shinfo(skb)->frags[i].size = + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + datalen -= skb_shinfo(skb)->frags[i].size; + skb->len += skb_shinfo(skb)->frags[i].size; + skb->data_len += skb_shinfo(skb)->frags[i].size; + i++; + skb_shinfo(skb)->nr_frags = i; + } + + while (i < frags) { + int rem; + + if (i == 0) + break; + + rem = skb_shinfo(skb)->frags[i - 1].size / 2; + if (rem == 0) + break; + + skb_shinfo(skb)->frags[i - 1].size -= rem; + + skb_shinfo(skb)->frags[i] = + skb_shinfo(skb)->frags[i - 1]; + get_page(skb_shinfo(skb)->frags[i].page); + skb_shinfo(skb)->frags[i].page = + skb_shinfo(skb)->frags[i - 1].page; + skb_shinfo(skb)->frags[i].page_offset += + skb_shinfo(skb)->frags[i - 1].size; + skb_shinfo(skb)->frags[i].size = rem; + i++; + skb_shinfo(skb)->nr_frags = i; + } + } + + /* Stamp the time, and sequence number, + * convert them to network byte order + */ + pgh->pgh_magic = htonl(PKTGEN_MAGIC); + pgh->seq_num = htonl(pkt_dev->seq_num); + + do_gettimeofday(×tamp); + pgh->tv_sec = htonl(timestamp.tv_sec); + pgh->tv_usec = htonl(timestamp.tv_usec); +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2613,7 +2702,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct udphdr *udph; int datalen, iplen; struct iphdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IP); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -2729,76 +2817,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; - - if (pkt_dev->nfrags <= 0) { - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - memset(pgh + 1, 0, datalen - sizeof(struct pktgen_hdr)); - } else { - int frags = pkt_dev->nfrags; - int i, len; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - len = datalen - frags * PAGE_SIZE; - memset(skb_put(skb, len), 0, len); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } + pktgen_finalize_skb(pkt_dev, skb, datalen); #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) @@ -2980,7 +2999,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct udphdr *udph; int datalen; struct ipv6hdr *iph; - struct pktgen_hdr *pgh = NULL; __be16 protocol = htons(ETH_P_IPV6); __be32 *mpls; __be16 *vlan_tci = NULL; /* Encapsulates priority and VLAN ID */ @@ -3083,75 +3101,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { - int frags = pkt_dev->nfrags; - int i; - - pgh = (struct pktgen_hdr *)(((char *)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags * PAGE_SIZE) { - skb_put(skb, datalen - frags * PAGE_SIZE); - datalen = frags * PAGE_SIZE; - } - - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; - } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } - } - - /* Stamp the time, and sequence number, - * convert them to network byte order - * should we update cloned packets too ? - */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } - /* pkt_dev->seq_num++; FF: you really mean this? */ + pktgen_finalize_skb(pkt_dev, skb, datalen); return skb; } @@ -3884,6 +3834,8 @@ static int pktgen_remove_device(struct pktgen_thread *t, free_SAs(pkt_dev); #endif vfree(pkt_dev->flows); + if (pkt_dev->page) + put_page(pkt_dev->page); kfree(pkt_dev); return 0; } -- cgit v1.1 From ad86e1f27a9a97a9e50810b10bca678407b1d6fd Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 26 Jan 2011 11:50:03 +0100 Subject: netfilter: xt_connlimit: pick right dstaddr in NAT scenario MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xt_connlimit normally records the "original" tuples in a hashlist (such as "1.2.3.4 -> 5.6.7.8"), and looks in this list for iph->daddr when counting. When the user however uses DNAT in PREROUTING, looking for iph->daddr -- which is now 192.168.9.10 -- will not match. Thus in daddr mode, we need to record the reverse direction tuple ("192.168.9.10 -> 1.2.3.4") instead. In the reverse tuple, the dst addr is on the src side, which is convenient, as count_them still uses &conn->tuple.src.u3. Signed-off-by: Jan Engelhardt --- net/netfilter/xt_connlimit.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 7fd3fd5..e029c48 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -185,11 +185,15 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) int connections; ct = nf_ct_get(skb, &ctinfo); - if (ct != NULL) - tuple_ptr = &ct->tuplehash[0].tuple; - else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, &tuple)) + if (ct != NULL) { + if (info->flags & XT_CONNLIMIT_DADDR) + tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + else + tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + par->family, &tuple)) { goto hotdrop; + } if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); -- cgit v1.1 From 144ce879b057c760194d808c90826cd96308f423 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Jan 2011 07:21:57 +0000 Subject: net_sched: sch_mqprio: dont leak kernel memory mqprio_dump() should make sure all fields of struct tc_mqprio_qopt are initialized. Signed-off-by: Eric Dumazet CC: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_mqprio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index fbc6f53..effd4ee 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -215,7 +215,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); - struct tc_mqprio_qopt opt; + struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; unsigned int i; -- cgit v1.1 From ba99d93b3d7bb3a6406bc86f41ab863895968a0f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Jan 2011 09:22:15 +0100 Subject: mac80211: use DECLARE_EVENT_CLASS For events that include only the local struct as their parameter, we can use DECLARE_EVENT_CLASS and save quite some binary size across segments as well lines of code. text data bss dec hex filename 375745 19296 916 395957 60ab5 mac80211.ko.before 367473 17888 916 386277 5e4e5 mac80211.ko.after -8272 -1408 0 -9680 -25d0 delta Some more tracepoints with identical arguments could be combined like this but for now this is the one that benefits most. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/driver-trace.h | 202 ++++++++------------------------------------ 1 file changed, 33 insertions(+), 169 deletions(-) (limited to 'net') diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index fbabbc2..e5cce19 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -9,6 +9,11 @@ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, ...) \ static inline void trace_ ## name(proto) {} +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(...) +#undef DEFINE_EVENT +#define DEFINE_EVENT(evt_class, name, proto, ...) \ +static inline void trace_ ## name(proto) {} #endif #undef TRACE_SYSTEM @@ -38,7 +43,7 @@ static inline void trace_ ## name(proto) {} * Tracing for driver callbacks. */ -TRACE_EVENT(drv_return_void, +DECLARE_EVENT_CLASS(local_only_evt, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local), TP_STRUCT__entry( @@ -50,6 +55,11 @@ TRACE_EVENT(drv_return_void, TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG) ); +DEFINE_EVENT(local_only_evt, drv_return_void, + TP_PROTO(struct ieee80211_local *local), + TP_ARGS(local) +); + TRACE_EVENT(drv_return_int, TP_PROTO(struct ieee80211_local *local, int ret), TP_ARGS(local, ret), @@ -78,40 +88,14 @@ TRACE_EVENT(drv_return_u64, TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret) ); -TRACE_EVENT(drv_start, +DEFINE_EVENT(local_only_evt, drv_start, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_stop, +DEFINE_EVENT(local_only_evt, drv_stop, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_add_interface, @@ -439,40 +423,14 @@ TRACE_EVENT(drv_hw_scan, ) ); -TRACE_EVENT(drv_sw_scan_start, +DEFINE_EVENT(local_only_evt, drv_sw_scan_start, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_sw_scan_complete, +DEFINE_EVENT(local_only_evt, drv_sw_scan_complete, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_get_stats, @@ -702,23 +660,9 @@ TRACE_EVENT(drv_conf_tx, ) ); -TRACE_EVENT(drv_get_tsf, +DEFINE_EVENT(local_only_evt, drv_get_tsf, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_set_tsf, @@ -742,41 +686,14 @@ TRACE_EVENT(drv_set_tsf, ) ); -TRACE_EVENT(drv_reset_tsf, +DEFINE_EVENT(local_only_evt, drv_reset_tsf, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(drv_tx_last_beacon, +DEFINE_EVENT(local_only_evt, drv_tx_last_beacon, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(drv_ampdu_action, @@ -962,22 +879,9 @@ TRACE_EVENT(drv_remain_on_channel, ) ); -TRACE_EVENT(drv_cancel_remain_on_channel, +DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); /* @@ -1072,23 +976,9 @@ TRACE_EVENT(api_stop_tx_ba_cb, ) ); -TRACE_EVENT(api_restart_hw, +DEFINE_EVENT(local_only_evt, api_restart_hw, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, - LOCAL_PR_ARG - ) + TP_ARGS(local) ); TRACE_EVENT(api_beacon_loss, @@ -1217,40 +1107,14 @@ TRACE_EVENT(api_chswitch_done, ) ); -TRACE_EVENT(api_ready_on_channel, +DEFINE_EVENT(local_only_evt, api_ready_on_channel, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); -TRACE_EVENT(api_remain_on_channel_expired, +DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired, TP_PROTO(struct ieee80211_local *local), - - TP_ARGS(local), - - TP_STRUCT__entry( - LOCAL_ENTRY - ), - - TP_fast_assign( - LOCAL_ASSIGN; - ), - - TP_printk( - LOCAL_PR_FMT, LOCAL_PR_ARG - ) + TP_ARGS(local) ); /* -- cgit v1.1 From 62fa8a846d7de4b299232e330c74b7783539df76 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Jan 2011 20:51:05 -0800 Subject: net: Implement read-only protection and COW'ing of metrics. Routing metrics are now copy-on-write. Initially a route entry points it's metrics at a read-only location. If a routing table entry exists, it will point there. Else it will point at the all zero metric place-holder called 'dst_default_metrics'. The writeability state of the metrics is stored in the low bits of the metrics pointer, we have two bits left to spare if we want to store more states. For the initial implementation, COW is implemented simply via kmalloc. However future enhancements will change this to place the writable metrics somewhere else, in order to increase sharing. Very likely this "somewhere else" will be the inetpeer cache. Note also that this means that metrics updates may transiently fail if we cannot COW the metrics successfully. But even by itself, this patch should decrease memory usage and increase cache locality especially for routing workloads. In those cases the read-only metric copies stay in place and never get written to. TCP workloads where metrics get updated, and those rare cases where PMTU triggers occur, will take a very slight performance hit. But that hit will be alleviated when the long-term writable metrics move to a more sharable location. Since the metrics storage went from a u32 array of RTAX_MAX entries to what is essentially a pointer, some retooling of the dst_entry layout was necessary. Most importantly, we need to preserve the alignment of the reference count so that it doesn't share cache lines with the read-mostly state, as per Eric Dumazet's alignment assertion checks. The only non-trivial bit here is the move of the 'flags' member into the writeable cacheline. This is OK since we are always accessing the flags around the same moment when we made a modification to the reference count. Signed-off-by: David S. Miller --- net/core/dst.c | 39 +++++++++++++++++++++++++++++++++++++++ net/decnet/dn_route.c | 18 +++++++++++++----- net/ipv4/route.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/xfrm4_policy.c | 4 ++++ net/ipv6/route.c | 15 ++++++++++++--- net/ipv6/xfrm6_policy.c | 2 ++ 6 files changed, 114 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index b99c7c7..5788935 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -164,6 +164,8 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); +static const u32 dst_default_metrics[RTAX_MAX]; + void *dst_alloc(struct dst_ops *ops) { struct dst_entry *dst; @@ -180,6 +182,7 @@ void *dst_alloc(struct dst_ops *ops) dst->lastuse = jiffies; dst->path = dst; dst->input = dst->output = dst_discard; + dst_init_metrics(dst, dst_default_metrics, true); #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif @@ -282,6 +285,42 @@ void dst_release(struct dst_entry *dst) } EXPORT_SYMBOL(dst_release); +u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + + if (p) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + kfree(p); + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } + } + return p; +} +EXPORT_SYMBOL(dst_cow_metrics_generic); + +/* Caller asserts that dst_metrics_read_only(dst) is false. */ +void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) +{ + unsigned long prev, new; + + new = (unsigned long) dst_default_metrics; + prev = cmpxchg(&dst->_metrics, old, new); + if (prev == old) + kfree(__DST_METRICS_PTR(old)); +} +EXPORT_SYMBOL(__dst_destroy_metrics_generic); + /** * skb_dst_set_noref - sets skb dst, without a reference * @skb: buffer diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5e63636..42c9c62 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -112,6 +112,7 @@ static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); +static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); @@ -133,11 +134,18 @@ static struct dst_ops dn_dst_ops = { .check = dn_dst_check, .default_advmss = dn_dst_default_advmss, .default_mtu = dn_dst_default_mtu, + .cow_metrics = dst_cow_metrics_generic, + .destroy = dn_dst_destroy, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, }; +static void dn_dst_destroy(struct dst_entry *dst) +{ + dst_destroy_metrics_generic(dst); +} + static __inline__ unsigned dn_hash(__le16 src, __le16 dst) { __u16 tmp = (__u16 __force)(src ^ dst); @@ -814,14 +822,14 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; struct net_device *dev = rt->dst.dev; + unsigned int mss_metric; struct neighbour *n; - unsigned int metric; if (fi) { if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - dst_import_metrics(&rt->dst, fi->fib_metrics); + dst_init_metrics(&rt->dst, fi->fib_metrics, true); } rt->rt_type = res->type; @@ -834,10 +842,10 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); - metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); - if (metric) { + mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); + if (mss_metric) { unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); - if (metric > mss) + if (mss_metric > mss) dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); } return 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3e5b7cc..980030d4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -152,6 +152,36 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { } +static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + + if (p) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + kfree(p); + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } else { + struct rtable *rt = (struct rtable *) dst; + + if (rt->fi) { + fib_info_put(rt->fi); + rt->fi = NULL; + } + } + } + return p; +} + static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), @@ -159,6 +189,7 @@ static struct dst_ops ipv4_dst_ops = { .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .default_mtu = ipv4_default_mtu, + .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, @@ -1441,6 +1472,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, if (rt->peer) atomic_inc(&rt->peer->refcnt); + if (rt->fi) + atomic_inc(&rt->fi->fib_clntref); if (arp_bind_neighbour(&rt->dst) || !(rt->dst.neighbour->nud_state & @@ -1720,6 +1753,11 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct rtable *rt = (struct rtable *) dst; struct inet_peer *peer = rt->peer; + dst_destroy_metrics_generic(dst); + if (rt->fi) { + fib_info_put(rt->fi); + rt->fi = NULL; + } if (peer) { rt->peer = NULL; inet_putpeer(peer); @@ -1824,7 +1862,9 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - dst_import_metrics(dst, fi->fib_metrics); + rt->fi = fi; + atomic_inc(&fi->fib_clntref); + dst_init_metrics(dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif @@ -2752,6 +2792,9 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi rt->peer = ort->peer; if (rt->peer) atomic_inc(&rt->peer->refcnt); + rt->fi = ort->fi; + if (rt->fi) + atomic_inc(&rt->fi->fib_clntref); dst_free(new); } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b057d40..19fbdec 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -196,8 +196,11 @@ static void xfrm4_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + dst_destroy_metrics_generic(dst); + if (likely(xdst->u.rt.peer)) inet_putpeer(xdst->u.rt.peer); + xfrm_dst_destroy(xdst); } @@ -215,6 +218,7 @@ static struct dst_ops xfrm4_dst_ops = { .protocol = cpu_to_be16(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1534508..45fafa0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -105,6 +105,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .default_mtu = ip6_default_mtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -125,6 +126,10 @@ static struct dst_ops ip6_dst_blackhole_ops = { .update_pmtu = ip6_rt_blackhole_update_pmtu, }; +static const u32 ip6_template_metrics[RTAX_MAX] = { + [RTAX_HOPLIMIT - 1] = 255, +}; + static struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -193,6 +198,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } + dst_destroy_metrics_generic(dst); if (peer) { BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); rt->rt6i_peer = NULL; @@ -2681,7 +2687,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_null_entry->dst, + ip6_template_metrics, true); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2692,7 +2699,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, + ip6_template_metrics, true); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -2702,7 +2710,8 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; - dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); + dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, + ip6_template_metrics, true); #endif net->ipv6.sysctl.flush_delay = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index da87428..834dc02 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -220,6 +220,7 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); + dst_destroy_metrics_generic(dst); if (likely(xdst->u.rt6.rt6i_peer)) inet_putpeer(xdst->u.rt6.rt6i_peer); xfrm_dst_destroy(xdst); @@ -257,6 +258,7 @@ static struct dst_ops xfrm6_dst_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, + .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, -- cgit v1.1 From 705ca147176090203afd7503392e6e770637499b Mon Sep 17 00:00:00 2001 From: Thomas Jacob Date: Thu, 27 Jan 2011 10:56:32 +0100 Subject: netfilter: xt_iprange: typo in IPv4 match debug print code Signed-off-by: Thomas Jacob Signed-off-by: Patrick McHardy --- net/netfilter/xt_iprange.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 88f7c35..77b9ebc 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -31,7 +31,7 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par) pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n", &iph->saddr, (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "", - &info->src_max.ip, + &info->src_min.ip, &info->src_max.ip); return false; } -- cgit v1.1 From 144001bddcb4db62c2261f1d703d835851031577 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Jan 2011 13:52:16 -0800 Subject: inetpeer: Mark metrics as "new" in fresh inetpeer entries. Set the RTAX_LOCKED metric to INETPEER_METRICS_NEW (basically, all ones) on fresh inetpeer entries. This way code can determine if default metrics have been loaded in from a routing table entry already. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index a96e656..b6513b1 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -512,6 +512,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) atomic_set(&p->rid, 0); atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); p->tcp_ts_stamp = 0; + p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; INIT_LIST_HEAD(&p->unused); -- cgit v1.1 From 065825402c058f4a123ddc53dbbe864cc5caaf64 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Jan 2011 14:58:42 -0800 Subject: net: Store ipv4/ipv6 COW'd metrics in inetpeer cache. Please note that the IPSEC dst entry metrics keep using the generic metrics COW'ing mechanism using kmalloc/kfree. This gives the IPSEC routes an opportunity to use metrics which are unique to their encapsulated paths. Signed-off-by: David S. Miller --- net/ipv4/route.c | 18 +++++++++++------- net/ipv6/route.c | 33 +++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 980030d4..68cee35 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -154,25 +154,30 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) { - u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC); + struct rtable *rt = (struct rtable *) dst; + struct inet_peer *peer; + u32 *p = NULL; + + if (!rt->peer) + rt_bind_peer(rt, 1); - if (p) { + peer = rt->peer; + if (peer) { u32 *old_p = __DST_METRICS_PTR(old); unsigned long prev, new; - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + p = peer->metrics; + if (inet_metrics_new(peer)) + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); new = (unsigned long) p; prev = cmpxchg(&dst->_metrics, old, new); if (prev != old) { - kfree(p); p = __DST_METRICS_PTR(prev); if (prev & DST_METRICS_READ_ONLY) p = NULL; } else { - struct rtable *rt = (struct rtable *) dst; - if (rt->fi) { fib_info_put(rt->fi); rt->fi = NULL; @@ -1753,7 +1758,6 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct rtable *rt = (struct rtable *) dst; struct inet_peer *peer = rt->peer; - dst_destroy_metrics_generic(dst); if (rt->fi) { fib_info_put(rt->fi); rt->fi = NULL; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 691798c..72609f1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -97,6 +97,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net, struct in6_addr *gwaddr, int ifindex); #endif +static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + struct rt6_info *rt = (struct rt6_info *) dst; + struct inet_peer *peer; + u32 *p = NULL; + + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + + peer = rt->rt6i_peer; + if (peer) { + u32 *old_p = __DST_METRICS_PTR(old); + unsigned long prev, new; + + p = peer->metrics; + if (inet_metrics_new(peer)) + memcpy(p, old_p, sizeof(u32) * RTAX_MAX); + + new = (unsigned long) p; + prev = cmpxchg(&dst->_metrics, old, new); + + if (prev != old) { + p = __DST_METRICS_PTR(prev); + if (prev & DST_METRICS_READ_ONLY) + p = NULL; + } + } + return p; +} + static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, .protocol = cpu_to_be16(ETH_P_IPV6), @@ -105,7 +135,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .default_mtu = ip6_default_mtu, - .cow_metrics = dst_cow_metrics_generic, + .cow_metrics = ipv6_cow_metrics, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -198,7 +228,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } - dst_destroy_metrics_generic(dst); if (peer) { rt->rt6i_peer = NULL; inet_putpeer(peer); -- cgit v1.1 From ccf434380d1a67df2dcb9113206b77d0cb0a1cef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Jan 2011 18:08:02 +0000 Subject: net: fix dev_seq_next() Commit c6d14c84566d (net: Introduce for_each_netdev_rcu() iterator) added a race in dev_seq_next(). The rcu_dereference() call should be done _before_ testing the end of list, or we might return a wrong net_device if a concurrent thread changes net_device list under us. Note : discovered thanks to a sparse warning : net/core/dev.c:3919:9: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Eric Dumazet CC: Paul E. McKenney Signed-off-by: David S. Miller --- net/core/dev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1b4c07f..ddd5df2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4051,12 +4051,15 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = (v == SEQ_START_TOKEN) ? - first_net_device(seq_file_net(seq)) : - next_net_device((struct net_device *)v); + struct net_device *dev = v; + + if (v == SEQ_START_TOKEN) + dev = first_net_device_rcu(seq_file_net(seq)); + else + dev = next_net_device_rcu(dev); ++*pos; - return rcu_dereference(dev); + return dev; } void dev_seq_stop(struct seq_file *seq, void *v) -- cgit v1.1 From a4daad6b0923030fbd3b00a01f570e4c3eef446b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Jan 2011 22:01:53 -0800 Subject: net: Pre-COW metrics for TCP. TCP is going to record metrics for the connection, so pre-COW the route metrics at route cache entry creation time. This avoids several atomic operations that have to occur if we COW the metrics after the entry reaches global visibility. Signed-off-by: David S. Miller --- net/ipv4/route.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 68cee35..dd57f48 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1857,6 +1857,28 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) return mtu; } +static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) +{ + if (!(rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS)) { + no_cow: + rt->fi = fi; + atomic_inc(&fi->fib_clntref); + dst_init_metrics(&rt->dst, fi->fib_metrics, true); + } else { + struct inet_peer *peer; + + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + if (!peer) + goto no_cow; + if (inet_metrics_new(peer)) + memcpy(peer->metrics, fi->fib_metrics, + sizeof(u32) * RTAX_MAX); + dst_init_metrics(&rt->dst, peer->metrics, false); + } +} + static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) { struct dst_entry *dst = &rt->dst; @@ -1866,9 +1888,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - rt->fi = fi; - atomic_inc(&fi->fib_clntref); - dst_init_metrics(dst, fi->fib_metrics, true); + rt_init_metrics(rt, fi); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif -- cgit v1.1 From 6a4ddef2a3805d5b0664a94579b7a651bc202266 Mon Sep 17 00:00:00 2001 From: Thomas Jacob Date: Fri, 28 Jan 2011 19:33:13 +0100 Subject: netfilter: xt_iprange: add IPv6 match debug print code Signed-off-by: Thomas Jacob Signed-off-by: Patrick McHardy --- net/netfilter/xt_iprange.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 77b9ebc..d3eb5ed 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -78,15 +78,27 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par) m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0; m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0; m ^= !!(info->flags & IPRANGE_SRC_INV); - if (m) + if (m) { + pr_debug("src IP %pI6 NOT in range %s%pI6-%pI6\n", + &iph->saddr, + (info->flags & IPRANGE_SRC_INV) ? "(INV) " : "", + &info->src_min.in6, + &info->src_max.in6); return false; + } } if (info->flags & IPRANGE_DST) { m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0; m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0; m ^= !!(info->flags & IPRANGE_DST_INV); - if (m) + if (m) { + pr_debug("dst IP %pI6 NOT in range %s%pI6-%pI6\n", + &iph->daddr, + (info->flags & IPRANGE_DST_INV) ? "(INV) " : "", + &info->dst_min.in6, + &info->dst_max.in6); return false; + } } return true; } -- cgit v1.1 From 45cbad6a1299842b5ae9a8a9c09630af063692f8 Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Tue, 25 Jan 2011 12:21:22 +0200 Subject: cfg80211: Allow non-zero indexes for device specific pair-wise ciphers Some vendor specific cipher suites require non-zero key indexes for pairwise keys, but as of currently, the cfg80211 does not allow it. As validating they cipher parameters for vendor specific cipher suites is the job of the driver or hardware/firmware, change the cfg80211 to allow also non-zero pairwise key indexes for vendor specific ciphers. Signed-off-by: Juuso Oikarinen Signed-off-by: John W. Linville --- net/wireless/util.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index 4ed065d..6a750bc 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -167,12 +167,15 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, /* * Disallow pairwise keys with non-zero index unless it's WEP - * (because current deployments use pairwise WEP keys with - * non-zero indizes but 802.11i clearly specifies to use zero) + * or a vendor specific cipher (because current deployments use + * pairwise WEP keys with non-zero indices and for vendor specific + * ciphers this should be validated in the driver or hardware level + * - but 802.11i clearly specifies to use zero) */ if (pairwise && key_idx && - params->cipher != WLAN_CIPHER_SUITE_WEP40 && - params->cipher != WLAN_CIPHER_SUITE_WEP104) + ((params->cipher == WLAN_CIPHER_SUITE_TKIP) || + (params->cipher == WLAN_CIPHER_SUITE_CCMP) || + (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC))) return -EINVAL; switch (params->cipher) { -- cgit v1.1 From 6d744bacee8195c915c514409a81d470ce7b1177 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Jan 2011 14:13:17 +0100 Subject: mac80211: add MCS information to radiotap This adds the MCS information we currently get from the drivers into radiotap. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f36d70f..7185c93 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -85,6 +85,9 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, if (len & 1) /* padding for RX_FLAGS if necessary */ len++; + if (status->flag & RX_FLAG_HT) /* HT info */ + len += 3; + return len; } @@ -193,6 +196,20 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, rx_flags |= IEEE80211_RADIOTAP_F_RX_BADPLCP; put_unaligned_le16(rx_flags, pos); pos += 2; + + if (status->flag & RX_FLAG_HT) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + *pos++ = IEEE80211_RADIOTAP_MCS_HAVE_MCS | + IEEE80211_RADIOTAP_MCS_HAVE_GI | + IEEE80211_RADIOTAP_MCS_HAVE_BW; + *pos = 0; + if (status->flag & RX_FLAG_SHORT_GI) + *pos |= IEEE80211_RADIOTAP_MCS_SGI; + if (status->flag & RX_FLAG_40MHZ) + *pos |= IEEE80211_RADIOTAP_MCS_BW_40; + pos++; + *pos++ = status->rate_idx; + } } /* -- cgit v1.1 From 4914b3bb7fa6badc25e77e22c47fde22b924b53f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 27 Jan 2011 22:09:34 -0800 Subject: mac80211: Add sdata state and flags to debugfs. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/debugfs_netdev.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 2dabdf7..872adb8 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -81,6 +81,8 @@ static ssize_t ieee80211_if_fmt_##name( \ IEEE80211_IF_FMT(name, field, "%d\n") #define IEEE80211_IF_FMT_HEX(name, field) \ IEEE80211_IF_FMT(name, field, "%#x\n") +#define IEEE80211_IF_FMT_LHEX(name, field) \ + IEEE80211_IF_FMT(name, field, "%#lx\n") #define IEEE80211_IF_FMT_SIZE(name, field) \ IEEE80211_IF_FMT(name, field, "%zd\n") @@ -145,6 +147,8 @@ IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ], HEX); IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], HEX); +IEEE80211_IF_FILE(flags, flags, HEX); +IEEE80211_IF_FILE(state, state, LHEX); /* STA attributes */ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); @@ -283,6 +287,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode, static void add_sta_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -296,6 +302,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) static void add_ap_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -307,6 +315,8 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) static void add_wds_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -316,12 +326,16 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) static void add_vlan_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(drop_unencrypted); + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); } static void add_monitor_files(struct ieee80211_sub_if_data *sdata) { + DEBUGFS_ADD(flags); + DEBUGFS_ADD(state); } #ifdef CONFIG_MAC80211_MESH -- cgit v1.1 From efe1cf0c5743caf4daccb57b399ef63edad41c9d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 28 Jan 2011 15:17:11 +0100 Subject: net/wireless/nl80211.c: Avoid call to genlmsg_cancel genlmsg_cancel subtracts some constants from its second argument before calling nlmsg_cancel. nlmsg_cancel then calls nlmsg_trim on the same arguments. nlmsg_trim tests for NULL before doing any computation, but a NULL second argument to genlmsg_cancel is no longer NULL due to the initial subtraction. Nothing else happens in this execution, so the call to genlmsg_cancel is simply unnecessary in this case. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression data; @@ if (data == NULL) { ... * genlmsg_cancel(..., data); ... return ...; } // Signed-off-by: Julia Lawall Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9b62710..864ddfb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2718,7 +2718,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) - goto nla_put_failure; + goto out; pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; @@ -2759,6 +2759,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_failure: genlmsg_cancel(msg, hdr); + out: nlmsg_free(msg); return -ENOBUFS; } @@ -2954,7 +2955,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) - goto nla_put_failure; + goto put_failure; NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, cfg80211_regdomain->alpha2); @@ -3001,6 +3002,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_put_failure: genlmsg_cancel(msg, hdr); +put_failure: nlmsg_free(msg); err = -EMSGSIZE; out: -- cgit v1.1 From 9c150e82ac50a611237bbebd508d17f6347d577c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 28 Jan 2011 14:01:25 -0800 Subject: ipv4: Allocate fib metrics dynamically. This is the initial gateway towards super-sharing metrics if they are all set to zero for a route. Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 9aff11d7..363ec39 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -152,6 +152,7 @@ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); + kfree(fi->fib_metrics); kfree(fi); } @@ -742,6 +743,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; + fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!fi->fib_metrics) + goto failure; fib_info_cnt++; fi->fib_net = hold_net(net); -- cgit v1.1 From 725d1e1b457dc2bbebb337677e73efe7c6d14da5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 28 Jan 2011 14:05:05 -0800 Subject: ipv4: Attach FIB info to dst_default_metrics when possible If there are no explicit metrics attached to a route, hook fi->fib_info up to dst_default_metrics. Signed-off-by: David S. Miller --- net/core/dst.c | 2 +- net/ipv4/fib_semantics.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 5788935..c1674fd 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -164,7 +164,7 @@ int dst_discard(struct sk_buff *skb) } EXPORT_SYMBOL(dst_discard); -static const u32 dst_default_metrics[RTAX_MAX]; +const u32 dst_default_metrics[RTAX_MAX]; void *dst_alloc(struct dst_ops *ops) { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 363ec39..48e93a5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -152,7 +152,8 @@ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); - kfree(fi->fib_metrics); + if (fi->fib_metrics != (u32 *) dst_default_metrics) + kfree(fi->fib_metrics); kfree(fi); } @@ -743,9 +744,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; - fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (!fi->fib_metrics) - goto failure; + if (cfg->fc_mx) { + fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!fi->fib_metrics) + goto failure; + } else + fi->fib_metrics = (u32 *) dst_default_metrics; fib_info_cnt++; fi->fib_net = hold_net(net); -- cgit v1.1 From b8dad61cc74b9ec71052e2a0e1c5119c65d166da Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 28 Jan 2011 14:07:16 -0800 Subject: ipv4: If fib metrics are default, no need to grab ref to FIB info. The fib metric memory in this case is static in the kernel image, so we don't need to reference count it since it's never going to go away on us. Signed-off-by: David S. Miller --- net/ipv4/route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index dd57f48..b1e5d3a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1861,8 +1861,10 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) { if (!(rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS)) { no_cow: - rt->fi = fi; - atomic_inc(&fi->fib_clntref); + if (fi->fib_metrics != (u32 *) dst_default_metrics) { + rt->fi = fi; + atomic_inc(&fi->fib_clntref); + } dst_init_metrics(&rt->dst, fi->fib_metrics, true); } else { struct inet_peer *peer; -- cgit v1.1 From 5c77d8bb8aeb4ec6804b6c32061109ba2ea6988d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 25 Jan 2011 21:59:26 +0000 Subject: batman-adv: Create roughly equal sized fragments The routing algorithm must know how large two fragments are to be able to decide that it is safe to merge them or if it should resubmit without waiting for the second part. When these two fragments have a too different size, it is not possible to guess right in every situation. The user could easily configure the MTU of the attached cards so that one fragment is forwarded and the other one is added to the fragments table to wait for the missing part. For even sized packets, it is possible to split it so that the resulting packages are equal sized by ignoring the old non-fragment header at the beginning of the original packet. This still creates different sized fragments for uneven sized packets. Reported-by: Russell Senior Reported-by: Marek Lindner Signed-off-by: Sven Eckelmann --- net/batman-adv/unicast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index ee41fef..811f7fc 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -224,7 +224,7 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, struct unicast_frag_packet *frag1, *frag2; int uc_hdr_len = sizeof(struct unicast_packet); int ucf_hdr_len = sizeof(struct unicast_frag_packet); - int data_len = skb->len; + int data_len = skb->len - uc_hdr_len; if (!bat_priv->primary_if) goto dropped; @@ -232,10 +232,11 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len); if (!frag_skb) goto dropped; + skb_reserve(frag_skb, ucf_hdr_len); unicast_packet = (struct unicast_packet *) skb->data; memcpy(&tmp_uc, unicast_packet, uc_hdr_len); - skb_split(skb, frag_skb, data_len / 2); + skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len); if (my_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 || my_skb_head_push(frag_skb, ucf_hdr_len) < 0) -- cgit v1.1 From ae361ce19fa135035c6b83ac1f07090b72fd4b8f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 25 Jan 2011 22:02:31 +0000 Subject: batman-adv: Calculate correct size for merged packets The routing algorithm must be able to decide if a fragment can be merged with the missing part and still be passed to a forwarding interface. The fragments can only differ by one byte in case that the original payload had an uneven length. In that situation the sender has to inform all possible receivers that the tail is one byte longer using the flag UNI_FRAG_LARGETAIL. The combination of UNI_FRAG_LARGETAIL and UNI_FRAG_HEAD flag makes it possible to calculate the correct length for even and uneven sized payloads. The original formula missed to add the unicast header at all and forgot to remove the fragment header of the second fragment. This made the results highly unreliable and only useful for machines with large differences between the configured MTUs. Reported-by: Russell Senior Reported-by: Marek Lindner Signed-off-by: Sven Eckelmann --- net/batman-adv/packet.h | 1 + net/batman-adv/routing.c | 2 +- net/batman-adv/unicast.c | 8 ++++++-- net/batman-adv/unicast.h | 23 +++++++++++++++++++++++ 4 files changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 2284e81..03ce0d3 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -50,6 +50,7 @@ /* fragmentation defines */ #define UNI_FRAG_HEAD 0x01 +#define UNI_FRAG_LARGETAIL 0x02 struct batman_packet { uint8_t packet_type; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 8828edd..a8cd389 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1193,7 +1193,7 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, dstaddr); if (unicast_packet->packet_type == BAT_UNICAST_FRAG && - 2 * skb->len - hdr_size <= batman_if->net_dev->mtu) { + frag_can_reassemble(skb, batman_if->net_dev->mtu)) { ret = frag_reassemble_skb(skb, bat_priv, &new_skb); diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 811f7fc..fc77079 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -225,6 +225,7 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, int uc_hdr_len = sizeof(struct unicast_packet); int ucf_hdr_len = sizeof(struct unicast_frag_packet); int data_len = skb->len - uc_hdr_len; + int large_tail = 0; if (!bat_priv->primary_if) goto dropped; @@ -254,8 +255,11 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, memcpy(frag1->orig, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); memcpy(frag2, frag1, sizeof(struct unicast_frag_packet)); - frag1->flags |= UNI_FRAG_HEAD; - frag2->flags &= ~UNI_FRAG_HEAD; + if (data_len & 1) + large_tail = UNI_FRAG_LARGETAIL; + + frag1->flags = UNI_FRAG_HEAD | large_tail; + frag2->flags = large_tail; frag1->seqno = htons((uint16_t)atomic_inc_return( &batman_if->frag_seqno)); diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index e32b786..e7211c2 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -22,6 +22,8 @@ #ifndef _NET_BATMAN_ADV_UNICAST_H_ #define _NET_BATMAN_ADV_UNICAST_H_ +#include "packet.h" + #define FRAG_TIMEOUT 10000 /* purge frag list entrys after time in ms */ #define FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */ @@ -32,4 +34,25 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, struct batman_if *batman_if, uint8_t dstaddr[]); +static inline int frag_can_reassemble(struct sk_buff *skb, int mtu) +{ + struct unicast_frag_packet *unicast_packet; + int uneven_correction = 0; + unsigned int merged_size; + + unicast_packet = (struct unicast_frag_packet *)skb->data; + + if (unicast_packet->flags & UNI_FRAG_LARGETAIL) { + if (unicast_packet->flags & UNI_FRAG_HEAD) + uneven_correction = 1; + else + uneven_correction = -1; + } + + merged_size = (skb->len - sizeof(struct unicast_frag_packet)) * 2; + merged_size += sizeof(struct unicast_packet) + uneven_correction; + + return merged_size <= mtu; +} + #endif /* _NET_BATMAN_ADV_UNICAST_H_ */ -- cgit v1.1 From 74ef115359f5beb565baddfb250f264d9177c108 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 29 Dec 2010 16:15:19 +0000 Subject: batman-adv: remove unused parameters Some function parameters are obsolete now and can be removed. Reported-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sven Eckelmann --- net/batman-adv/originator.c | 2 +- net/batman-adv/routing.c | 15 ++++++--------- net/batman-adv/routing.h | 3 +-- net/batman-adv/send.c | 4 ++-- 4 files changed, 10 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6b7fb6b..3c5c889 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -247,7 +247,7 @@ static bool purge_orig_node(struct bat_priv *bat_priv, orig_node->hna_buff_len); /* update bonding candidates, we could have lost * some candidates. */ - update_bonding_candidates(bat_priv, orig_node); + update_bonding_candidates(orig_node); } } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index a8cd389..e946dc9 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -433,8 +433,7 @@ static char count_real_packets(struct ethhdr *ethhdr, } /* copy primary address for bonding */ -static void mark_bonding_address(struct bat_priv *bat_priv, - struct orig_node *orig_node, +static void mark_bonding_address(struct orig_node *orig_node, struct orig_node *orig_neigh_node, struct batman_packet *batman_packet) @@ -447,8 +446,7 @@ static void mark_bonding_address(struct bat_priv *bat_priv, } /* mark possible bond.candidates in the neighbor list */ -void update_bonding_candidates(struct bat_priv *bat_priv, - struct orig_node *orig_node) +void update_bonding_candidates(struct orig_node *orig_node) { int candidates; int interference_candidate; @@ -730,9 +728,8 @@ void receive_bat_packet(struct ethhdr *ethhdr, update_orig(bat_priv, orig_node, ethhdr, batman_packet, if_incoming, hna_buff, hna_buff_len, is_duplicate); - mark_bonding_address(bat_priv, orig_node, - orig_neigh_node, batman_packet); - update_bonding_candidates(bat_priv, orig_node); + mark_bonding_address(orig_node, orig_neigh_node, batman_packet); + update_bonding_candidates(orig_node); /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { @@ -866,7 +863,7 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, } static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, - struct sk_buff *skb, size_t icmp_len) + struct sk_buff *skb) { struct orig_node *orig_node; struct icmp_packet *icmp_packet; @@ -978,7 +975,7 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) /* TTL exceeded */ if (icmp_packet->ttl < 2) - return recv_icmp_ttl_exceeded(bat_priv, skb, hdr_size); + return recv_icmp_ttl_exceeded(bat_priv, skb); ret = NET_RX_DROP; diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index f108f23..725cc38 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -42,7 +42,6 @@ int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if); int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, struct batman_if *recv_if); -void update_bonding_candidates(struct bat_priv *bat_priv, - struct orig_node *orig_node); +void update_bonding_candidates(struct orig_node *orig_node); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index b89b9f7..77f8297 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -49,7 +49,7 @@ static unsigned long own_send_time(struct bat_priv *bat_priv) } /* when do we schedule a forwarded packet to be sent */ -static unsigned long forward_send_time(struct bat_priv *bat_priv) +static unsigned long forward_send_time(void) { return jiffies + msecs_to_jiffies(random32() % (JITTER/2)); } @@ -356,7 +356,7 @@ void schedule_forward_packet(struct orig_node *orig_node, else batman_packet->flags &= ~DIRECTLINK; - send_time = forward_send_time(bat_priv); + send_time = forward_send_time(); add_bat_packet_to_list(bat_priv, (unsigned char *)batman_packet, sizeof(struct batman_packet) + hna_buff_len, -- cgit v1.1 From 633979b43f23d776f6fb757f0f3d6d8089ab57b1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 27 Jan 2011 13:10:23 +0100 Subject: batman-adv: Remove dangling declaration of hash_remove_element Signed-off-by: Sven Eckelmann --- net/batman-adv/hash.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 09216ad..2f508e6 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -49,11 +49,6 @@ struct hashtable_t { /* allocates and clears the hash */ struct hashtable_t *hash_new(int size); -/* remove element if you already found the element you want to delete and don't - * need the overhead to find it again with hash_remove(). But usually, you - * don't want to use this function, as it fiddles with hash-internals. */ -void *hash_remove_element(struct hashtable_t *hash, struct element_t *elem); - /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash); -- cgit v1.1 From 335f94c981248e9f326986e0ac8d31f187ffeed0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 27 Jan 2011 13:12:04 +0100 Subject: batman-adv: Remove unused definitions Signed-off-by: Sven Eckelmann --- net/batman-adv/main.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 65106fb..c1ace85 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -22,9 +22,6 @@ #ifndef _NET_BATMAN_ADV_MAIN_H_ #define _NET_BATMAN_ADV_MAIN_H_ -/* Kernel Programming */ -#define LINUX - #define DRIVER_AUTHOR "Marek Lindner , " \ "Simon Wunderlich " #define DRIVER_DESC "B.A.T.M.A.N. advanced" @@ -54,7 +51,6 @@ #define NUM_WORDS (TQ_LOCAL_WINDOW_SIZE / WORD_BIT_SIZE) -#define PACKBUFF_SIZE 2000 #define LOG_BUF_LEN 8192 /* has to be a power of 2 */ #define VIS_INTERVAL 5000 /* 5 seconds */ @@ -96,15 +92,11 @@ #define DBG_ROUTES 2 /* route or hna added / changed / deleted */ #define DBG_ALL 3 -#define LOG_BUF_LEN 8192 /* has to be a power of 2 */ - /* * Vis */ -/* #define VIS_SUBCLUSTERS_DISABLED */ - /* * Kernel headers */ @@ -158,13 +150,6 @@ static inline void bat_dbg(char type __always_unused, } #endif -#define bat_warning(net_dev, fmt, arg...) \ - do { \ - struct net_device *_netdev = (net_dev); \ - struct bat_priv *_batpriv = netdev_priv(_netdev); \ - bat_dbg(DBG_ALL, _batpriv, fmt, ## arg); \ - pr_warning("%s: " fmt, _netdev->name, ## arg); \ - } while (0) #define bat_info(net_dev, fmt, arg...) \ do { \ struct net_device *_netdev = (net_dev); \ -- cgit v1.1 From fb86d7648ffdfc8778db2cd70d4bc5c6093e04c5 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 27 Jan 2011 13:16:08 +0100 Subject: batman-adv: Remove declaration of batman_skb_recv batman_skb_recv can be defined in hard-interface.c as static because it is never used outside of that file. Signed-off-by: Sven Eckelmann --- net/batman-adv/hard-interface.c | 11 +++++++++-- net/batman-adv/hard-interface.h | 4 ---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4f95777..8a9cf7a 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -34,6 +34,12 @@ /* protect update critical side of if_list - but not the content */ static DEFINE_SPINLOCK(if_list_lock); + +static int batman_skb_recv(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *ptype, + struct net_device *orig_dev); + static void hardif_free_rcu(struct rcu_head *rcu) { struct batman_if *batman_if; @@ -549,8 +555,9 @@ out: /* receive a packet with the batman ethertype coming on a hard * interface */ -int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype, struct net_device *orig_dev) +static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *ptype, + struct net_device *orig_dev) { struct bat_priv *bat_priv; struct batman_packet *batman_packet; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 30ec3b8..a42f5a4 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -35,10 +35,6 @@ struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev); int hardif_enable_interface(struct batman_if *batman_if, char *iface_name); void hardif_disable_interface(struct batman_if *batman_if); void hardif_remove_interfaces(void); -int batman_skb_recv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *ptype, - struct net_device *orig_dev); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); -- cgit v1.1 From 1299bdaa1cb522de940d912f661bef59b9a39dd7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 27 Jan 2011 13:48:54 +0100 Subject: batman-adv: Remove unused variables Signed-off-by: Sven Eckelmann --- net/batman-adv/bat_debugfs.c | 4 +--- net/batman-adv/routing.c | 7 ------- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c index 0ae81d0..d36d305 100644 --- a/net/batman-adv/bat_debugfs.c +++ b/net/batman-adv/bat_debugfs.c @@ -52,7 +52,6 @@ static void emit_log_char(struct debug_log *debug_log, char c) static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) { - int printed_len; va_list args; static char debug_log_buf[256]; char *p; @@ -62,8 +61,7 @@ static int fdebug_log(struct debug_log *debug_log, char *fmt, ...) spin_lock_bh(&debug_log->lock); va_start(args, fmt); - printed_len = vscnprintf(debug_log_buf, sizeof(debug_log_buf), - fmt, args); + vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args); va_end(args); for (p = debug_log_buf; *p != 0; p++) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index e946dc9..3b7e2f7 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -807,13 +807,11 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, { struct orig_node *orig_node; struct icmp_packet_rr *icmp_packet; - struct ethhdr *ethhdr; struct batman_if *batman_if; int ret; uint8_t dstaddr[ETH_ALEN]; icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); /* add data to device queue */ if (icmp_packet->msg_type != ECHO_REQUEST) { @@ -845,7 +843,6 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, return NET_RX_DROP; icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); memcpy(icmp_packet->orig, @@ -867,13 +864,11 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, { struct orig_node *orig_node; struct icmp_packet *icmp_packet; - struct ethhdr *ethhdr; struct batman_if *batman_if; int ret; uint8_t dstaddr[ETH_ALEN]; icmp_packet = (struct icmp_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); /* send TTL exceeded if packet is an echo request (traceroute) */ if (icmp_packet->msg_type != ECHO_REQUEST) { @@ -906,7 +901,6 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, return NET_RX_DROP; icmp_packet = (struct icmp_packet *) skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); memcpy(icmp_packet->orig, @@ -998,7 +992,6 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) return NET_RX_DROP; icmp_packet = (struct icmp_packet_rr *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); /* decrement ttl */ icmp_packet->ttl--; -- cgit v1.1 From 64afe35398269577ef9809474dd7dc0e5d265176 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 27 Jan 2011 10:38:15 +0100 Subject: batman-adv: Update copyright years Signed-off-by: Sven Eckelmann --- net/batman-adv/Makefile | 2 +- net/batman-adv/aggregation.c | 2 +- net/batman-adv/aggregation.h | 2 +- net/batman-adv/bat_debugfs.c | 2 +- net/batman-adv/bat_debugfs.h | 2 +- net/batman-adv/bat_sysfs.c | 2 +- net/batman-adv/bat_sysfs.h | 2 +- net/batman-adv/bitarray.c | 2 +- net/batman-adv/bitarray.h | 2 +- net/batman-adv/gateway_client.c | 2 +- net/batman-adv/gateway_client.h | 2 +- net/batman-adv/gateway_common.c | 2 +- net/batman-adv/gateway_common.h | 2 +- net/batman-adv/hard-interface.c | 2 +- net/batman-adv/hard-interface.h | 2 +- net/batman-adv/hash.c | 2 +- net/batman-adv/hash.h | 2 +- net/batman-adv/icmp_socket.c | 2 +- net/batman-adv/icmp_socket.h | 2 +- net/batman-adv/main.c | 2 +- net/batman-adv/main.h | 2 +- net/batman-adv/originator.c | 2 +- net/batman-adv/originator.h | 2 +- net/batman-adv/packet.h | 2 +- net/batman-adv/ring_buffer.c | 2 +- net/batman-adv/ring_buffer.h | 2 +- net/batman-adv/routing.c | 2 +- net/batman-adv/routing.h | 2 +- net/batman-adv/send.c | 2 +- net/batman-adv/send.h | 2 +- net/batman-adv/soft-interface.c | 2 +- net/batman-adv/soft-interface.h | 2 +- net/batman-adv/translation-table.c | 2 +- net/batman-adv/translation-table.h | 2 +- net/batman-adv/types.h | 2 +- net/batman-adv/unicast.c | 2 +- net/batman-adv/unicast.h | 2 +- net/batman-adv/vis.c | 2 +- net/batman-adv/vis.h | 2 +- 39 files changed, 39 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index d936aec..2de93d0 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c index 3850a3e..1997725 100644 --- a/net/batman-adv/aggregation.c +++ b/net/batman-adv/aggregation.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h index 71a91b3..6ce305b 100644 --- a/net/batman-adv/aggregation.h +++ b/net/batman-adv/aggregation.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c index d36d305..0e9d435 100644 --- a/net/batman-adv/bat_debugfs.c +++ b/net/batman-adv/bat_debugfs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h index 72df532..bc9cda3 100644 --- a/net/batman-adv/bat_debugfs.h +++ b/net/batman-adv/bat_debugfs.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index cd7bb51..f7b93a0 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h index 7f186c0..02f1fa7 100644 --- a/net/batman-adv/bat_sysfs.h +++ b/net/batman-adv/bat_sysfs.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index bbcd8f7..ad2ca92 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index ac54017..769c246 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 0065ffb..429a013 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 4585e65..2aa4391 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index b962982..50d3a59 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 5e728d0..55e527a 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 8a9cf7a..f2131f4 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index a42f5a4..ad19543 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 26e623e..fa26939 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 2f508e6..eae2440 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2006-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index ecf6d7f..5e86d6f 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index bf9b348..08b1859 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index b827f6a..dc9248d 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index c1ace85..e235d7b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 3c5c889..54863c9 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2009-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index d474ceb..8019fbd 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 03ce0d3..e757187 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c index defd37c..5bb6a61 100644 --- a/net/batman-adv/ring_buffer.c +++ b/net/batman-adv/ring_buffer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h index 6b0cb9a..0395b27 100644 --- a/net/batman-adv/ring_buffer.h +++ b/net/batman-adv/ring_buffer.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 3b7e2f7..028f739 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 725cc38..ceeca6f 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 77f8297..7cc620e 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index c4cefa8..bc53ade 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index e89ede1..145e0f7 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 02b7733..e7b0e1a 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a633b5a4..f6917dd 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 10c4c5c..a4f3a37 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index bf3f6f5..7270405 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index fc77079..cbf022c 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Andreas Langer * diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index e7211c2..8897308 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors: * * Andreas Langer * diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index cd4c423..a77b773 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich * diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h index 2c3b330..31b820d 100644 --- a/net/batman-adv/vis.h +++ b/net/batman-adv/vis.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2008-2010 B.A.T.M.A.N. contributors: + * Copyright (C) 2008-2011 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * -- cgit v1.1 From 8c7914dec29f39a6a8ca348a5eeace40a59be65d Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Tue, 1 Feb 2011 00:28:59 +0530 Subject: mac80211: disable power save if an infra AP vif exists PS should not be enabled if an infra AP vif exists in the interface list. So while recalculating PS, AP vif type should be taken into account. Reviewed-by: Johannes Berg Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3221069..dfa752e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -601,6 +601,14 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; + if (sdata->vif.type == NL80211_IFTYPE_AP) { + /* If an AP vif is found, then disable PS + * by setting the count to zero thereby setting + * ps_sdata to NULL. + */ + count = 0; + break; + } if (sdata->vif.type != NL80211_IFTYPE_STATION) continue; found = sdata; -- cgit v1.1 From 5b4704419cbd0b7597a91c19f9e8e8b17c1af071 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 31 Jan 2011 16:10:03 -0800 Subject: ipv4: Remember FIB alias list head and table in lookup results. This will be used later to implement fib_select_default() in a completely generic manner, instead of the current situation where the default route is re-looked up in the TRIE/HASH table and then the available aliases are analyzed. Signed-off-by: David S. Miller --- net/ipv4/fib_hash.c | 2 +- net/ipv4/fib_lookup.h | 2 +- net/ipv4/fib_semantics.c | 7 +++++-- net/ipv4/fib_trie.c | 8 ++++---- 4 files changed, 11 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index b3acb04..0a88866 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -288,7 +288,7 @@ int fib_table_lookup(struct fib_table *tb, if (f->fn_key != k) continue; - err = fib_semantic_match(&f->fn_alias, + err = fib_semantic_match(tb, &f->fn_alias, flp, res, fz->fz_order, fib_flags); if (err <= 0) diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c079cc0..d5c40d8 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -25,7 +25,7 @@ static inline void fib_alias_accessed(struct fib_alias *fa) } /* Exported by fib_semantics.c */ -extern int fib_semantic_match(struct list_head *head, +extern int fib_semantic_match(struct fib_table *tb, struct list_head *head, const struct flowi *flp, struct fib_result *res, int prefixlen, int fib_flags); extern void fib_release_info(struct fib_info *); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 48e93a5..1bf6fb9 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -889,8 +889,9 @@ failure: } /* Note! fib_semantic_match intentionally uses RCU list functions. */ -int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, int prefixlen, int fib_flags) +int fib_semantic_match(struct fib_table *tb, struct list_head *head, + const struct flowi *flp, struct fib_result *res, + int prefixlen, int fib_flags) { struct fib_alias *fa; int nh_sel = 0; @@ -954,6 +955,8 @@ out_fill_res: res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; + res->table = tb; + res->fa_head = head; if (!(fib_flags & FIB_LOOKUP_NOREF)) atomic_inc(&res->fi->fib_clntref); return 0; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0f28034..8cee5c8 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1340,7 +1340,7 @@ err: } /* should be called with rcu_read_lock */ -static int check_leaf(struct trie *t, struct leaf *l, +static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, t_key key, const struct flowi *flp, struct fib_result *res, int fib_flags) { @@ -1356,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l, if (l->key != (key & ntohl(mask))) continue; - err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); + err = fib_semantic_match(tb, &li->falh, flp, res, plen, fib_flags); #ifdef CONFIG_IP_FIB_TRIE_STATS if (err <= 0) @@ -1398,7 +1398,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); + ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); goto found; } @@ -1423,7 +1423,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); + ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags); if (ret > 0) goto backtrace; goto found; -- cgit v1.1 From 0c838ff1ade71162775afffd9e5c6478a60bdca6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 31 Jan 2011 16:16:50 -0800 Subject: ipv4: Consolidate all default route selection implementations. Both fib_trie and fib_hash have a local implementation of fib_table_select_default(). This is completely unnecessary code duplication. Since we now remember the fib_table and the head of the fib alias list of the default route, we can implement one single generic version of this routine. Looking at the fib_hash implementation you may get the impression that it's possible for there to be multiple top-level routes in the table for the default route. The truth is, it isn't, the insert code will only allow one entry to exist in the zero prefix hash table, because all keys evaluate to zero and all keys in a hash table must be unique. Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 15 ---------- net/ipv4/fib_hash.c | 72 ---------------------------------------------- net/ipv4/fib_semantics.c | 56 ++++++++++++++++++++++++++++++++++++ net/ipv4/fib_trie.c | 74 ------------------------------------------------ net/ipv4/route.c | 2 +- 5 files changed, 57 insertions(+), 162 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d2cdd4..930768b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -114,21 +114,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ -void fib_select_default(struct net *net, - const struct flowi *flp, struct fib_result *res) -{ - struct fib_table *tb; - int table = RT_TABLE_MAIN; -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) - return; - table = res->r->table; -#endif - tb = fib_get_table(net, table); - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - fib_table_select_default(tb, flp, res); -} - static void fib_flush(struct net *net) { int flushed = 0; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 0a88866..fadb602 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -302,78 +302,6 @@ out: return err; } -void fib_table_select_default(struct fib_table *tb, - const struct flowi *flp, struct fib_result *res) -{ - int order, last_idx; - struct hlist_node *node; - struct fib_node *f; - struct fib_info *fi = NULL; - struct fib_info *last_resort; - struct fn_hash *t = (struct fn_hash *)tb->tb_data; - struct fn_zone *fz = t->fn_zones[0]; - struct hlist_head *head; - - if (fz == NULL) - return; - - last_idx = -1; - last_resort = NULL; - order = -1; - - rcu_read_lock(); - head = rcu_dereference(fz->fz_hash); - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { - struct fib_info *next_fi = fa->fa_info; - - if (fa->fa_scope != res->scope || - fa->fa_type != RTN_UNICAST) - continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; - if (!next_fi->fib_nh[0].nh_gw || - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) - continue; - - fib_alias_accessed(fa); - - if (fi == NULL) { - if (next_fi != res->fi) - break; - } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - fi = next_fi; - order++; - } - } - - if (order <= 0 || fi == NULL) { - tb->tb_default = -1; - goto out; - } - - if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - - if (last_idx >= 0) - fib_result_assign(res, last_resort); - tb->tb_default = last_idx; -out: - rcu_read_unlock(); -} - /* Insert node F to FZ. */ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1bf6fb9..b15857d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1136,6 +1136,62 @@ int fib_sync_down_dev(struct net_device *dev, int force) return ret; } +/* Must be invoked inside of an RCU protected region. */ +void fib_select_default(struct fib_result *res) +{ + struct fib_info *fi = NULL, *last_resort = NULL; + struct list_head *fa_head = res->fa_head; + struct fib_table *tb = res->table; + int order = -1, last_idx = -1; + struct fib_alias *fa; + + list_for_each_entry_rcu(fa, fa_head, fa_list) { + struct fib_info *next_fi = fa->fa_info; + + if (fa->fa_scope != res->scope || + fa->fa_type != RTN_UNICAST) + continue; + + if (next_fi->fib_priority > res->fi->fib_priority) + break; + if (!next_fi->fib_nh[0].nh_gw || + next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) + continue; + + fib_alias_accessed(fa); + + if (fi == NULL) { + if (next_fi != res->fi) + break; + } else if (!fib_detect_death(fi, order, &last_resort, + &last_idx, tb->tb_default)) { + fib_result_assign(res, fi); + tb->tb_default = order; + goto out; + } + fi = next_fi; + order++; + } + + if (order <= 0 || fi == NULL) { + tb->tb_default = -1; + goto out; + } + + if (!fib_detect_death(fi, order, &last_resort, &last_idx, + tb->tb_default)) { + fib_result_assign(res, fi); + tb->tb_default = order; + goto out; + } + + if (last_idx >= 0) + fib_result_assign(res, last_resort); + tb->tb_default = last_idx; +out: + rcu_read_unlock(); +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH /* diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 8cee5c8..16d589c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1802,80 +1802,6 @@ void fib_free_table(struct fib_table *tb) kfree(tb); } -void fib_table_select_default(struct fib_table *tb, - const struct flowi *flp, - struct fib_result *res) -{ - struct trie *t = (struct trie *) tb->tb_data; - int order, last_idx; - struct fib_info *fi = NULL; - struct fib_info *last_resort; - struct fib_alias *fa = NULL; - struct list_head *fa_head; - struct leaf *l; - - last_idx = -1; - last_resort = NULL; - order = -1; - - rcu_read_lock(); - - l = fib_find_node(t, 0); - if (!l) - goto out; - - fa_head = get_fa_head(l, 0); - if (!fa_head) - goto out; - - if (list_empty(fa_head)) - goto out; - - list_for_each_entry_rcu(fa, fa_head, fa_list) { - struct fib_info *next_fi = fa->fa_info; - - if (fa->fa_scope != res->scope || - fa->fa_type != RTN_UNICAST) - continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; - if (!next_fi->fib_nh[0].nh_gw || - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) - continue; - - fib_alias_accessed(fa); - - if (fi == NULL) { - if (next_fi != res->fi) - break; - } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - fi = next_fi; - order++; - } - if (order <= 0 || fi == NULL) { - tb->tb_default = -1; - goto out; - } - - if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { - fib_result_assign(res, fi); - tb->tb_default = order; - goto out; - } - if (last_idx >= 0) - fib_result_assign(res, last_resort); - tb->tb_default = last_idx; -out: - rcu_read_unlock(); -} - static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b1e5d3a..242a3de 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2711,7 +2711,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, else #endif if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) - fib_select_default(net, &fl, &res); + fib_select_default(&res); if (!fl.fl4_src) fl.fl4_src = FIB_RES_PREFSRC(res); -- cgit v1.1 From a7b4f989a629493bb4ec4a354def784d440b32c4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:28:35 +0100 Subject: netfilter: ipset: IP set core support The patch adds the IP set core support to the kernel. The IP set core implements a netlink (nfnetlink) based protocol by which one can create, destroy, flush, rename, swap, list, save, restore sets, and add, delete, test elements from userspace. For simplicity (and backward compatibilty and for not to force ip(6)tables to be linked with a netlink library) reasons a small getsockopt-based protocol is also kept in order to communicate with the ip(6)tables match and target. The netlink protocol passes all u16, etc values in network order with NLA_F_NET_BYTEORDER flag. The protocol enforces the proper use of the NLA_F_NESTED and NLA_F_NET_BYTEORDER flags. For other kernel subsystems (netfilter match and target) the API contains the functions to add, delete and test elements in sets and the required calls to get/put refereces to the sets before those operations can be performed. The set types (which are implemented in independent modules) are stored in a simple RCU protected list. A set type may have variants: for example without timeout or with timeout support, for IPv4 or for IPv6. The sets (i.e. the pointers to the sets) are stored in an array. The sets are identified by their index in the array, which makes possible easy and fast swapping of sets. The array is protected indirectly by the nfnl mutex from nfnetlink. The content of the sets are protected by the rwlock of the set. There are functional differences between the add/del/test functions for the kernel and userspace: - kernel add/del/test: works on the current packet (i.e. one element) - kernel test: may trigger an "add" operation in order to fill out unspecified parts of the element from the packet (like MAC address) - userspace add/del: works on the netlink message and thus possibly on multiple elements from the IPSET_ATTR_ADT container attribute. - userspace add: may trigger resizing of a set Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 2 + net/netfilter/Makefile | 3 + net/netfilter/ipset/Kconfig | 26 + net/netfilter/ipset/Makefile | 8 + net/netfilter/ipset/ip_set_core.c | 1662 ++++++++++++++++++++++++++++++++++ net/netfilter/ipset/ip_set_getport.c | 136 +++ net/netfilter/ipset/pfxlen.c | 291 ++++++ 7 files changed, 2128 insertions(+) create mode 100644 net/netfilter/ipset/Kconfig create mode 100644 net/netfilter/ipset/Makefile create mode 100644 net/netfilter/ipset/ip_set_core.c create mode 100644 net/netfilter/ipset/ip_set_getport.c create mode 100644 net/netfilter/ipset/pfxlen.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index faf7412..351abf8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1052,4 +1052,6 @@ endif # NETFILTER_XTABLES endmenu +source "net/netfilter/ipset/Kconfig" + source "net/netfilter/ipvs/Kconfig" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 9ae6878..510b586 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -105,5 +105,8 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o +# ipset +obj-$(CONFIG_IP_SET) += ipset/ + # IPVS obj-$(CONFIG_IP_VS) += ipvs/ diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig new file mode 100644 index 0000000..5ade156 --- /dev/null +++ b/net/netfilter/ipset/Kconfig @@ -0,0 +1,26 @@ +menuconfig IP_SET + tristate "IP set support" + depends on INET && NETFILTER + help + This option adds IP set support to the kernel. + In order to define and use the sets, you need the userspace utility + ipset(8). You can use the sets in netfilter via the "set" match + and "SET" target. + + To compile it as a module, choose M here. If unsure, say N. + +if IP_SET + +config IP_SET_MAX + int "Maximum number of IP sets" + default 256 + range 2 65534 + depends on IP_SET + help + You can define here default value of the maximum number + of IP sets for the kernel. + + The value can be overriden by the 'max_sets' module + parameter of the 'ip_set' module. + +endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile new file mode 100644 index 0000000..910cd42 --- /dev/null +++ b/net/netfilter/ipset/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the ipset modules +# + +ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o + +# ipset core +obj-$(CONFIG_IP_SET) += ip_set.o diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c new file mode 100644 index 0000000..8a73624 --- /dev/null +++ b/net/netfilter/ipset/ip_set_core.c @@ -0,0 +1,1662 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module for IP set management */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static LIST_HEAD(ip_set_type_list); /* all registered set types */ +static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ + +static struct ip_set **ip_set_list; /* all individual sets */ +static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ + +#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) + +static unsigned int max_sets; + +module_param(max_sets, int, 0600); +MODULE_PARM_DESC(max_sets, "maximal number of sets"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("core IP set support"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); + +/* + * The set types are implemented in modules and registered set types + * can be found in ip_set_type_list. Adding/deleting types is + * serialized by ip_set_type_mutex. + */ + +static inline void +ip_set_type_lock(void) +{ + mutex_lock(&ip_set_type_mutex); +} + +static inline void +ip_set_type_unlock(void) +{ + mutex_unlock(&ip_set_type_mutex); +} + +/* Register and deregister settype */ + +static struct ip_set_type * +find_set_type(const char *name, u8 family, u8 revision) +{ + struct ip_set_type *type; + + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name) && + (type->family == family || type->family == AF_UNSPEC) && + type->revision == revision) + return type; + return NULL; +} + +/* Unlock, try to load a set type module and lock again */ +static int +try_to_load_type(const char *name) +{ + nfnl_unlock(); + pr_debug("try to load ip_set_%s\n", name); + if (request_module("ip_set_%s", name) < 0) { + pr_warning("Can't find ip_set type %s\n", name); + nfnl_lock(); + return -IPSET_ERR_FIND_TYPE; + } + nfnl_lock(); + return -EAGAIN; +} + +/* Find a set type and reference it */ +static int +find_set_type_get(const char *name, u8 family, u8 revision, + struct ip_set_type **found) +{ + rcu_read_lock(); + *found = find_set_type(name, family, revision); + if (*found) { + int err = !try_module_get((*found)->me); + rcu_read_unlock(); + return err ? -EFAULT : 0; + } + rcu_read_unlock(); + + return try_to_load_type(name); +} + +/* Find a given set type by name and family. + * If we succeeded, the supported minimal and maximum revisions are + * filled out. + */ +static int +find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) +{ + struct ip_set_type *type; + bool found = false; + + *min = *max = 0; + rcu_read_lock(); + list_for_each_entry_rcu(type, &ip_set_type_list, list) + if (STREQ(type->name, name) && + (type->family == family || type->family == AF_UNSPEC)) { + found = true; + if (type->revision < *min) + *min = type->revision; + else if (type->revision > *max) + *max = type->revision; + } + rcu_read_unlock(); + if (found) + return 0; + + return try_to_load_type(name); +} + +#define family_name(f) ((f) == AF_INET ? "inet" : \ + (f) == AF_INET6 ? "inet6" : "any") + +/* Register a set type structure. The type is identified by + * the unique triple of name, family and revision. + */ +int +ip_set_type_register(struct ip_set_type *type) +{ + int ret = 0; + + if (type->protocol != IPSET_PROTOCOL) { + pr_warning("ip_set type %s, family %s, revision %u uses " + "wrong protocol version %u (want %u)\n", + type->name, family_name(type->family), + type->revision, type->protocol, IPSET_PROTOCOL); + return -EINVAL; + } + + ip_set_type_lock(); + if (find_set_type(type->name, type->family, type->revision)) { + /* Duplicate! */ + pr_warning("ip_set type %s, family %s, revision %u " + "already registered!\n", type->name, + family_name(type->family), type->revision); + ret = -EINVAL; + goto unlock; + } + list_add_rcu(&type->list, &ip_set_type_list); + pr_debug("type %s, family %s, revision %u registered.\n", + type->name, family_name(type->family), type->revision); +unlock: + ip_set_type_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(ip_set_type_register); + +/* Unregister a set type. There's a small race with ip_set_create */ +void +ip_set_type_unregister(struct ip_set_type *type) +{ + ip_set_type_lock(); + if (!find_set_type(type->name, type->family, type->revision)) { + pr_warning("ip_set type %s, family %s, revision %u " + "not registered\n", type->name, + family_name(type->family), type->revision); + goto unlock; + } + list_del_rcu(&type->list); + pr_debug("type %s, family %s, revision %u unregistered.\n", + type->name, family_name(type->family), type->revision); +unlock: + ip_set_type_unlock(); + + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(ip_set_type_unregister); + +/* Utility functions */ +void * +ip_set_alloc(size_t size) +{ + void *members = NULL; + + if (size < KMALLOC_MAX_SIZE) + members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + + if (members) { + pr_debug("%p: allocated with kmalloc\n", members); + return members; + } + + members = vzalloc(size); + if (!members) + return NULL; + pr_debug("%p: allocated with vmalloc\n", members); + + return members; +} +EXPORT_SYMBOL_GPL(ip_set_alloc); + +void +ip_set_free(void *members) +{ + pr_debug("%p: free with %s\n", members, + is_vmalloc_addr(members) ? "vfree" : "kfree"); + if (is_vmalloc_addr(members)) + vfree(members); + else + kfree(members); +} +EXPORT_SYMBOL_GPL(ip_set_free); + +static inline bool +flag_nested(const struct nlattr *nla) +{ + return nla->nla_type & NLA_F_NESTED; +} + +static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { + [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, + [IPSET_ATTR_IPADDR_IPV6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, +}; + +int +ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) +{ + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + + if (unlikely(!flag_nested(nla))) + return -IPSET_ERR_PROTOCOL; + if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla), + ipaddr_policy)) + return -IPSET_ERR_PROTOCOL; + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) + return -IPSET_ERR_PROTOCOL; + + *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]); + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); + +int +ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) +{ + struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1]; + + if (unlikely(!flag_nested(nla))) + return -IPSET_ERR_PROTOCOL; + + if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla), + ipaddr_policy)) + return -IPSET_ERR_PROTOCOL; + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) + return -IPSET_ERR_PROTOCOL; + + memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), + sizeof(struct in6_addr)); + return 0; +} +EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); + +/* + * Creating/destroying/renaming/swapping affect the existence and + * the properties of a set. All of these can be executed from userspace + * only and serialized by the nfnl mutex indirectly from nfnetlink. + * + * Sets are identified by their index in ip_set_list and the index + * is used by the external references (set/SET netfilter modules). + * + * The set behind an index may change by swapping only, from userspace. + */ + +static inline void +__ip_set_get(ip_set_id_t index) +{ + atomic_inc(&ip_set_list[index]->ref); +} + +static inline void +__ip_set_put(ip_set_id_t index) +{ + atomic_dec(&ip_set_list[index]->ref); +} + +/* + * Add, del and test set entries from kernel. + * + * The set behind the index must exist and must be referenced + * so it can't be destroyed (or changed) under our foot. + */ + +int +ip_set_test(ip_set_id_t index, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags) +{ + struct ip_set *set = ip_set_list[index]; + int ret = 0; + + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u\n", set->name, index); + + if (dim < set->type->dimension || + !(family == set->family || set->family == AF_UNSPEC)) + return 0; + + read_lock_bh(&set->lock); + ret = set->variant->kadt(set, skb, IPSET_TEST, family, dim, flags); + read_unlock_bh(&set->lock); + + if (ret == -EAGAIN) { + /* Type requests element to be completed */ + pr_debug("element must be competed, ADD is triggered\n"); + write_lock_bh(&set->lock); + set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); + write_unlock_bh(&set->lock); + ret = 1; + } + + /* Convert error codes to nomatch */ + return (ret < 0 ? 0 : ret); +} +EXPORT_SYMBOL_GPL(ip_set_test); + +int +ip_set_add(ip_set_id_t index, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags) +{ + struct ip_set *set = ip_set_list[index]; + int ret; + + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u\n", set->name, index); + + if (dim < set->type->dimension || + !(family == set->family || set->family == AF_UNSPEC)) + return 0; + + write_lock_bh(&set->lock); + ret = set->variant->kadt(set, skb, IPSET_ADD, family, dim, flags); + write_unlock_bh(&set->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(ip_set_add); + +int +ip_set_del(ip_set_id_t index, const struct sk_buff *skb, + u8 family, u8 dim, u8 flags) +{ + struct ip_set *set = ip_set_list[index]; + int ret = 0; + + BUG_ON(set == NULL || atomic_read(&set->ref) == 0); + pr_debug("set %s, index %u\n", set->name, index); + + if (dim < set->type->dimension || + !(family == set->family || set->family == AF_UNSPEC)) + return 0; + + write_lock_bh(&set->lock); + ret = set->variant->kadt(set, skb, IPSET_DEL, family, dim, flags); + write_unlock_bh(&set->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(ip_set_del); + +/* + * Find set by name, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex must already be activated. + */ +ip_set_id_t +ip_set_get_byname(const char *name, struct ip_set **set) +{ + ip_set_id_t i, index = IPSET_INVALID_ID; + struct ip_set *s; + + for (i = 0; i < ip_set_max; i++) { + s = ip_set_list[i]; + if (s != NULL && STREQ(s->name, name)) { + __ip_set_get(i); + index = i; + *set = s; + } + } + + return index; +} +EXPORT_SYMBOL_GPL(ip_set_get_byname); + +/* + * If the given set pointer points to a valid set, decrement + * reference count by 1. The caller shall not assume the index + * to be valid, after calling this function. + * + * The nfnl mutex must already be activated. + */ +void +ip_set_put_byindex(ip_set_id_t index) +{ + if (ip_set_list[index] != NULL) { + BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + __ip_set_put(index); + } +} +EXPORT_SYMBOL_GPL(ip_set_put_byindex); + +/* + * Get the name of a set behind a set index. + * We assume the set is referenced, so it does exist and + * can't be destroyed. The set cannot be renamed due to + * the referencing either. + * + * The nfnl mutex must already be activated. + */ +const char * +ip_set_name_byindex(ip_set_id_t index) +{ + const struct ip_set *set = ip_set_list[index]; + + BUG_ON(set == NULL); + BUG_ON(atomic_read(&set->ref) == 0); + + /* Referenced, so it's safe */ + return set->name; +} +EXPORT_SYMBOL_GPL(ip_set_name_byindex); + +/* + * Routines to call by external subsystems, which do not + * call nfnl_lock for us. + */ + +/* + * Find set by name, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex is used in the function. + */ +ip_set_id_t +ip_set_nfnl_get(const char *name) +{ + struct ip_set *s; + ip_set_id_t index; + + nfnl_lock(); + index = ip_set_get_byname(name, &s); + nfnl_unlock(); + + return index; +} +EXPORT_SYMBOL_GPL(ip_set_nfnl_get); + +/* + * Find set by index, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. + * + * The nfnl mutex is used in the function. + */ +ip_set_id_t +ip_set_nfnl_get_byindex(ip_set_id_t index) +{ + if (index > ip_set_max) + return IPSET_INVALID_ID; + + nfnl_lock(); + if (ip_set_list[index]) + __ip_set_get(index); + else + index = IPSET_INVALID_ID; + nfnl_unlock(); + + return index; +} +EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); + +/* + * If the given set pointer points to a valid set, decrement + * reference count by 1. The caller shall not assume the index + * to be valid, after calling this function. + * + * The nfnl mutex is used in the function. + */ +void +ip_set_nfnl_put(ip_set_id_t index) +{ + nfnl_lock(); + if (ip_set_list[index] != NULL) { + BUG_ON(atomic_read(&ip_set_list[index]->ref) == 0); + __ip_set_put(index); + } + nfnl_unlock(); +} +EXPORT_SYMBOL_GPL(ip_set_nfnl_put); + +/* + * Communication protocol with userspace over netlink. + * + * We already locked by nfnl_lock. + */ + +static inline bool +protocol_failed(const struct nlattr * const tb[]) +{ + return !tb[IPSET_ATTR_PROTOCOL] || + nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL; +} + +static inline u32 +flag_exist(const struct nlmsghdr *nlh) +{ + return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; +} + +static struct nlmsghdr * +start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, + enum ipset_cmd cmd) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + + nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + sizeof(*nfmsg), flags); + if (nlh == NULL) + return NULL; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + return nlh; +} + +/* Create a set */ + +static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1}, + [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, + [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, + [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, +}; + +static ip_set_id_t +find_set_id(const char *name) +{ + ip_set_id_t i, index = IPSET_INVALID_ID; + const struct ip_set *set; + + for (i = 0; index == IPSET_INVALID_ID && i < ip_set_max; i++) { + set = ip_set_list[i]; + if (set != NULL && STREQ(set->name, name)) + index = i; + } + return index; +} + +static inline struct ip_set * +find_set(const char *name) +{ + ip_set_id_t index = find_set_id(name); + + return index == IPSET_INVALID_ID ? NULL : ip_set_list[index]; +} + +static int +find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) +{ + ip_set_id_t i; + + *index = IPSET_INVALID_ID; + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] == NULL) { + if (*index == IPSET_INVALID_ID) + *index = i; + } else if (STREQ(name, ip_set_list[i]->name)) { + /* Name clash */ + *set = ip_set_list[i]; + return -EEXIST; + } + } + if (*index == IPSET_INVALID_ID) + /* No free slot remained */ + return -IPSET_ERR_MAX_SETS; + return 0; +} + +static int +ip_set_create(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set, *clash; + ip_set_id_t index = IPSET_INVALID_ID; + struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; + const char *name, *typename; + u8 family, revision; + u32 flags = flag_exist(nlh); + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + attr[IPSET_ATTR_TYPENAME] == NULL || + attr[IPSET_ATTR_REVISION] == NULL || + attr[IPSET_ATTR_FAMILY] == NULL || + (attr[IPSET_ATTR_DATA] != NULL && + !flag_nested(attr[IPSET_ATTR_DATA])))) + return -IPSET_ERR_PROTOCOL; + + name = nla_data(attr[IPSET_ATTR_SETNAME]); + typename = nla_data(attr[IPSET_ATTR_TYPENAME]); + family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); + revision = nla_get_u8(attr[IPSET_ATTR_REVISION]); + pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", + name, typename, family_name(family), revision); + + /* + * First, and without any locks, allocate and initialize + * a normal base set structure. + */ + set = kzalloc(sizeof(struct ip_set), GFP_KERNEL); + if (!set) + return -ENOMEM; + rwlock_init(&set->lock); + strlcpy(set->name, name, IPSET_MAXNAMELEN); + atomic_set(&set->ref, 0); + set->family = family; + + /* + * Next, check that we know the type, and take + * a reference on the type, to make sure it stays available + * while constructing our new set. + * + * After referencing the type, we try to create the type + * specific part of the set without holding any locks. + */ + ret = find_set_type_get(typename, family, revision, &(set->type)); + if (ret) + goto out; + + /* + * Without holding any locks, create private part. + */ + if (attr[IPSET_ATTR_DATA] && + nla_parse(tb, IPSET_ATTR_CREATE_MAX, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + set->type->create_policy)) { + ret = -IPSET_ERR_PROTOCOL; + goto put_out; + } + + ret = set->type->create(set, tb, flags); + if (ret != 0) + goto put_out; + + /* BTW, ret==0 here. */ + + /* + * Here, we have a valid, constructed set and we are protected + * by nfnl_lock. Find the first free index in ip_set_list and + * check clashing. + */ + if ((ret = find_free_id(set->name, &index, &clash)) != 0) { + /* If this is the same set and requested, ignore error */ + if (ret == -EEXIST && + (flags & IPSET_FLAG_EXIST) && + STREQ(set->type->name, clash->type->name) && + set->type->family == clash->type->family && + set->type->revision == clash->type->revision && + set->variant->same_set(set, clash)) + ret = 0; + goto cleanup; + } + + /* + * Finally! Add our shiny new set to the list, and be done. + */ + pr_debug("create: '%s' created with index %u!\n", set->name, index); + ip_set_list[index] = set; + + return ret; + +cleanup: + set->variant->destroy(set); +put_out: + module_put(set->type->me); +out: + kfree(set); + return ret; +} + +/* Destroy sets */ + +static const struct nla_policy +ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, +}; + +static void +ip_set_destroy_set(ip_set_id_t index) +{ + struct ip_set *set = ip_set_list[index]; + + pr_debug("set: %s\n", set->name); + ip_set_list[index] = NULL; + + /* Must call it without holding any lock */ + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); +} + +static int +ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + ip_set_id_t i; + + if (unlikely(protocol_failed(attr))) + return -IPSET_ERR_PROTOCOL; + + /* References are protected by the nfnl mutex */ + if (!attr[IPSET_ATTR_SETNAME]) { + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL && + (atomic_read(&ip_set_list[i]->ref))) + return -IPSET_ERR_BUSY; + } + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL) + ip_set_destroy_set(i); + } + } else { + i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (i == IPSET_INVALID_ID) + return -ENOENT; + else if (atomic_read(&ip_set_list[i]->ref)) + return -IPSET_ERR_BUSY; + + ip_set_destroy_set(i); + } + return 0; +} + +/* Flush sets */ + +static void +ip_set_flush_set(struct ip_set *set) +{ + pr_debug("set: %s\n", set->name); + + write_lock_bh(&set->lock); + set->variant->flush(set); + write_unlock_bh(&set->lock); +} + +static int +ip_set_flush(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + ip_set_id_t i; + + if (unlikely(protocol_failed(attr))) + return -EPROTO; + + if (!attr[IPSET_ATTR_SETNAME]) { + for (i = 0; i < ip_set_max; i++) + if (ip_set_list[i] != NULL) + ip_set_flush_set(ip_set_list[i]); + } else { + i = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (i == IPSET_INVALID_ID) + return -ENOENT; + + ip_set_flush_set(ip_set_list[i]); + } + + return 0; +} + +/* Rename a set */ + +static const struct nla_policy +ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, +}; + +static int +ip_set_rename(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + const char *name2; + ip_set_id_t i; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + attr[IPSET_ATTR_SETNAME2] == NULL)) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -ENOENT; + if (atomic_read(&set->ref) != 0) + return -IPSET_ERR_REFERENCED; + + name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL && + STREQ(ip_set_list[i]->name, name2)) + return -IPSET_ERR_EXIST_SETNAME2; + } + strncpy(set->name, name2, IPSET_MAXNAMELEN); + + return 0; +} + +/* Swap two sets so that name/index points to the other. + * References and set names are also swapped. + * + * We are protected by the nfnl mutex and references are + * manipulated only by holding the mutex. The kernel interfaces + * do not hold the mutex but the pointer settings are atomic + * so the ip_set_list always contains valid pointers to the sets. + */ + +static int +ip_set_swap(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *from, *to; + ip_set_id_t from_id, to_id; + char from_name[IPSET_MAXNAMELEN]; + u32 from_ref; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + attr[IPSET_ATTR_SETNAME2] == NULL)) + return -IPSET_ERR_PROTOCOL; + + from_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (from_id == IPSET_INVALID_ID) + return -ENOENT; + + to_id = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME2])); + if (to_id == IPSET_INVALID_ID) + return -IPSET_ERR_EXIST_SETNAME2; + + from = ip_set_list[from_id]; + to = ip_set_list[to_id]; + + /* Features must not change. + * Not an artifical restriction anymore, as we must prevent + * possible loops created by swapping in setlist type of sets. */ + if (!(from->type->features == to->type->features && + from->type->family == to->type->family)) + return -IPSET_ERR_TYPE_MISMATCH; + + /* No magic here: ref munging protected by the nfnl_lock */ + strncpy(from_name, from->name, IPSET_MAXNAMELEN); + from_ref = atomic_read(&from->ref); + + strncpy(from->name, to->name, IPSET_MAXNAMELEN); + atomic_set(&from->ref, atomic_read(&to->ref)); + strncpy(to->name, from_name, IPSET_MAXNAMELEN); + atomic_set(&to->ref, from_ref); + + ip_set_list[from_id] = to; + ip_set_list[to_id] = from; + + return 0; +} + +/* List/save set data */ + +#define DUMP_INIT 0L +#define DUMP_ALL 1L +#define DUMP_ONE 2L +#define DUMP_LAST 3L + +static int +ip_set_dump_done(struct netlink_callback *cb) +{ + if (cb->args[2]) { + pr_debug("release set %s\n", ip_set_list[cb->args[1]]->name); + __ip_set_put((ip_set_id_t) cb->args[1]); + } + return 0; +} + +static inline void +dump_attrs(struct nlmsghdr *nlh) +{ + const struct nlattr *attr; + int rem; + + pr_debug("dump nlmsg\n"); + nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) { + pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len); + } +} + +static int +dump_init(struct netlink_callback *cb) +{ + struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); + int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *attr = (void *)nlh + min_len; + ip_set_id_t index; + + /* Second pass, so parser can't fail */ + nla_parse(cda, IPSET_ATTR_CMD_MAX, + attr, nlh->nlmsg_len - min_len, ip_set_setname_policy); + + /* cb->args[0] : dump single set/all sets + * [1] : set index + * [..]: type specific + */ + + if (!cda[IPSET_ATTR_SETNAME]) { + cb->args[0] = DUMP_ALL; + return 0; + } + + index = find_set_id(nla_data(cda[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) + return -ENOENT; + + cb->args[0] = DUMP_ONE; + cb->args[1] = index; + return 0; +} + +static int +ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) +{ + ip_set_id_t index = IPSET_INVALID_ID, max; + struct ip_set *set = NULL; + struct nlmsghdr *nlh = NULL; + unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + int ret = 0; + + if (cb->args[0] == DUMP_INIT) { + ret = dump_init(cb); + if (ret < 0) { + nlh = nlmsg_hdr(cb->skb); + /* We have to create and send the error message + * manually :-( */ + if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(cb->skb, nlh, ret); + return ret; + } + } + + if (cb->args[1] >= ip_set_max) + goto out; + + pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]); + max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + for (; cb->args[1] < max; cb->args[1]++) { + index = (ip_set_id_t) cb->args[1]; + set = ip_set_list[index]; + if (set == NULL) { + if (cb->args[0] == DUMP_ONE) { + ret = -ENOENT; + goto out; + } + continue; + } + /* When dumping all sets, we must dump "sorted" + * so that lists (unions of sets) are dumped last. + */ + if (cb->args[0] != DUMP_ONE && + !((cb->args[0] == DUMP_ALL) ^ + (set->type->features & IPSET_DUMP_LAST))) + continue; + pr_debug("List set: %s\n", set->name); + if (!cb->args[2]) { + /* Start listing: make sure set won't be destroyed */ + pr_debug("reference set\n"); + __ip_set_get(index); + } + nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, flags, + IPSET_CMD_LIST); + if (!nlh) { + ret = -EMSGSIZE; + goto release_refcount; + } + NLA_PUT_U8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb, IPSET_ATTR_SETNAME, set->name); + switch (cb->args[2]) { + case 0: + /* Core header data */ + NLA_PUT_STRING(skb, IPSET_ATTR_TYPENAME, + set->type->name); + NLA_PUT_U8(skb, IPSET_ATTR_FAMILY, + set->family); + NLA_PUT_U8(skb, IPSET_ATTR_REVISION, + set->type->revision); + ret = set->variant->head(set, skb); + if (ret < 0) + goto release_refcount; + /* Fall through and add elements */ + default: + read_lock_bh(&set->lock); + ret = set->variant->list(set, skb, cb); + read_unlock_bh(&set->lock); + if (!cb->args[2]) { + /* Set is done, proceed with next one */ + if (cb->args[0] == DUMP_ONE) + cb->args[1] = IPSET_INVALID_ID; + else + cb->args[1]++; + } + goto release_refcount; + } + } + goto out; + +nla_put_failure: + ret = -EFAULT; +release_refcount: + /* If there was an error or set is done, release set */ + if (ret || !cb->args[2]) { + pr_debug("release set %s\n", ip_set_list[index]->name); + __ip_set_put(index); + } + + /* If we dump all sets, continue with dumping last ones */ + if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2]) + cb->args[0] = DUMP_LAST; + +out: + if (nlh) { + nlmsg_end(skb, nlh); + pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len); + dump_attrs(nlh); + } + + return ret < 0 ? ret : skb->len; +} + +static int +ip_set_dump(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + if (unlikely(protocol_failed(attr))) + return -IPSET_ERR_PROTOCOL; + + return netlink_dump_start(ctnl, skb, nlh, + ip_set_dump_start, + ip_set_dump_done); +} + +/* Add, del and test */ + +static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, + [IPSET_ATTR_ADT] = { .type = NLA_NESTED }, +}; + +static int +call_ad(struct sk_buff *skb, struct ip_set *set, + struct nlattr *tb[], enum ipset_adt adt, + u32 flags, bool use_lineno) +{ + int ret, retried = 0; + u32 lineno = 0; + bool eexist = flags & IPSET_FLAG_EXIST; + + do { + write_lock_bh(&set->lock); + ret = set->variant->uadt(set, tb, adt, &lineno, flags); + write_unlock_bh(&set->lock); + } while (ret == -EAGAIN && + set->variant->resize && + (ret = set->variant->resize(set, retried++)) == 0); + + if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) + return 0; + if (lineno && use_lineno) { + /* Error in restore/batch mode: send back lineno */ + struct nlmsghdr *nlh = nlmsg_hdr(skb); + int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; + struct nlattr *cmdattr = (void *)nlh + min_len; + u32 *errline; + + nla_parse(cda, IPSET_ATTR_CMD_MAX, + cmdattr, nlh->nlmsg_len - min_len, + ip_set_adt_policy); + + errline = nla_data(cda[IPSET_ATTR_LINENO]); + + *errline = lineno; + } + + return ret; +} + +static int +ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + const struct nlattr *nla; + u32 flags = flag_exist(nlh); + bool use_lineno; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + !((attr[IPSET_ATTR_DATA] != NULL) ^ + (attr[IPSET_ATTR_ADT] != NULL)) || + (attr[IPSET_ATTR_DATA] != NULL && + !flag_nested(attr[IPSET_ATTR_DATA])) || + (attr[IPSET_ATTR_ADT] != NULL && + (!flag_nested(attr[IPSET_ATTR_ADT]) || + attr[IPSET_ATTR_LINENO] == NULL)))) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -ENOENT; + + use_lineno = !!attr[IPSET_ATTR_LINENO]; + if (attr[IPSET_ATTR_DATA]) { + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno); + } else { + int nla_rem; + + nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { + memset(tb, 0, sizeof(tb)); + if (nla_type(nla) != IPSET_ATTR_DATA || + !flag_nested(nla) || + nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(nla), nla_len(nla), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(skb, set, tb, IPSET_ADD, + flags, use_lineno); + if (ret < 0) + return ret; + } + } + return ret; +} + +static int +ip_set_udel(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + const struct nlattr *nla; + u32 flags = flag_exist(nlh); + bool use_lineno; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + !((attr[IPSET_ATTR_DATA] != NULL) ^ + (attr[IPSET_ATTR_ADT] != NULL)) || + (attr[IPSET_ATTR_DATA] != NULL && + !flag_nested(attr[IPSET_ATTR_DATA])) || + (attr[IPSET_ATTR_ADT] != NULL && + (!flag_nested(attr[IPSET_ATTR_ADT]) || + attr[IPSET_ATTR_LINENO] == NULL)))) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -ENOENT; + + use_lineno = !!attr[IPSET_ATTR_LINENO]; + if (attr[IPSET_ATTR_DATA]) { + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno); + } else { + int nla_rem; + + nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { + memset(tb, 0, sizeof(*tb)); + if (nla_type(nla) != IPSET_ATTR_DATA || + !flag_nested(nla) || + nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(nla), nla_len(nla), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + ret = call_ad(skb, set, tb, IPSET_DEL, + flags, use_lineno); + if (ret < 0) + return ret; + } + } + return ret; +} + +static int +ip_set_utest(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct ip_set *set; + struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL || + attr[IPSET_ATTR_DATA] == NULL || + !flag_nested(attr[IPSET_ATTR_DATA]))) + return -IPSET_ERR_PROTOCOL; + + set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + if (set == NULL) + return -ENOENT; + + if (nla_parse(tb, IPSET_ATTR_ADT_MAX, + nla_data(attr[IPSET_ATTR_DATA]), + nla_len(attr[IPSET_ATTR_DATA]), + set->type->adt_policy)) + return -IPSET_ERR_PROTOCOL; + + read_lock_bh(&set->lock); + ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0); + read_unlock_bh(&set->lock); + /* Userspace can't trigger element to be re-added */ + if (ret == -EAGAIN) + ret = 1; + + return ret < 0 ? ret : ret > 0 ? 0 : -IPSET_ERR_EXIST; +} + +/* Get headed data of a set */ + +static int +ip_set_header(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + const struct ip_set *set; + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + ip_set_id_t index; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_SETNAME] == NULL)) + return -IPSET_ERR_PROTOCOL; + + index = find_set_id(nla_data(attr[IPSET_ATTR_SETNAME])); + if (index == IPSET_INVALID_ID) + return -ENOENT; + set = ip_set_list[index]; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_HEADER); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb2, IPSET_ATTR_SETNAME, set->name); + NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, set->type->name); + NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, set->family); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, set->type->revision); + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +/* Get type data */ + +static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, + [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, + .len = IPSET_MAXNAMELEN - 1 }, + [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, +}; + +static int +ip_set_type(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + u8 family, min, max; + const char *typename; + int ret = 0; + + if (unlikely(protocol_failed(attr) || + attr[IPSET_ATTR_TYPENAME] == NULL || + attr[IPSET_ATTR_FAMILY] == NULL)) + return -IPSET_ERR_PROTOCOL; + + family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); + typename = nla_data(attr[IPSET_ATTR_TYPENAME]); + ret = find_set_type_minmax(typename, family, &min, &max); + if (ret) + return ret; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_TYPE); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + NLA_PUT_STRING(skb2, IPSET_ATTR_TYPENAME, typename); + NLA_PUT_U8(skb2, IPSET_ATTR_FAMILY, family); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION, max); + NLA_PUT_U8(skb2, IPSET_ATTR_REVISION_MIN, min); + nlmsg_end(skb2, nlh2); + + pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +/* Get protocol version */ + +static const struct nla_policy +ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { + [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, +}; + +static int +ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const attr[]) +{ + struct sk_buff *skb2; + struct nlmsghdr *nlh2; + int ret = 0; + + if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL)) + return -IPSET_ERR_PROTOCOL; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + + nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + IPSET_CMD_PROTOCOL); + if (!nlh2) + goto nlmsg_failure; + NLA_PUT_U8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL); + nlmsg_end(skb2, nlh2); + + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (ret < 0) + return ret; + + return 0; + +nla_put_failure: + nlmsg_cancel(skb2, nlh2); +nlmsg_failure: + kfree_skb(skb2); + return -EMSGSIZE; +} + +static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { + [IPSET_CMD_CREATE] = { + .call = ip_set_create, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_create_policy, + }, + [IPSET_CMD_DESTROY] = { + .call = ip_set_destroy, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_FLUSH] = { + .call = ip_set_flush, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_RENAME] = { + .call = ip_set_rename, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname2_policy, + }, + [IPSET_CMD_SWAP] = { + .call = ip_set_swap, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname2_policy, + }, + [IPSET_CMD_LIST] = { + .call = ip_set_dump, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_SAVE] = { + .call = ip_set_dump, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_ADD] = { + .call = ip_set_uadd, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_DEL] = { + .call = ip_set_udel, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_TEST] = { + .call = ip_set_utest, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_adt_policy, + }, + [IPSET_CMD_HEADER] = { + .call = ip_set_header, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_setname_policy, + }, + [IPSET_CMD_TYPE] = { + .call = ip_set_type, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_type_policy, + }, + [IPSET_CMD_PROTOCOL] = { + .call = ip_set_protocol, + .attr_count = IPSET_ATTR_CMD_MAX, + .policy = ip_set_protocol_policy, + }, +}; + +static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { + .name = "ip_set", + .subsys_id = NFNL_SUBSYS_IPSET, + .cb_count = IPSET_MSG_MAX, + .cb = ip_set_netlink_subsys_cb, +}; + +/* Interface to iptables/ip6tables */ + +static int +ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) +{ + unsigned *op; + void *data; + int copylen = *len, ret = 0; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (optval != SO_IP_SET) + return -EBADF; + if (*len < sizeof(unsigned)) + return -EINVAL; + + data = vmalloc(*len); + if (!data) + return -ENOMEM; + if (copy_from_user(data, user, *len) != 0) { + ret = -EFAULT; + goto done; + } + op = (unsigned *) data; + + if (*op < IP_SET_OP_VERSION) { + /* Check the version at the beginning of operations */ + struct ip_set_req_version *req_version = data; + if (req_version->version != IPSET_PROTOCOL) { + ret = -EPROTO; + goto done; + } + } + + switch (*op) { + case IP_SET_OP_VERSION: { + struct ip_set_req_version *req_version = data; + + if (*len != sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + + req_version->version = IPSET_PROTOCOL; + ret = copy_to_user(user, req_version, + sizeof(struct ip_set_req_version)); + goto done; + } + case IP_SET_OP_GET_BYNAME: { + struct ip_set_req_get_set *req_get = data; + + if (*len != sizeof(struct ip_set_req_get_set)) { + ret = -EINVAL; + goto done; + } + req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; + nfnl_lock(); + req_get->set.index = find_set_id(req_get->set.name); + nfnl_unlock(); + goto copy; + } + case IP_SET_OP_GET_BYINDEX: { + struct ip_set_req_get_set *req_get = data; + + if (*len != sizeof(struct ip_set_req_get_set) || + req_get->set.index >= ip_set_max) { + ret = -EINVAL; + goto done; + } + nfnl_lock(); + strncpy(req_get->set.name, + ip_set_list[req_get->set.index] + ? ip_set_list[req_get->set.index]->name : "", + IPSET_MAXNAMELEN); + nfnl_unlock(); + goto copy; + } + default: + ret = -EBADMSG; + goto done; + } /* end of switch(op) */ + +copy: + ret = copy_to_user(user, data, copylen); + +done: + vfree(data); + if (ret > 0) + ret = 0; + return ret; +} + +static struct nf_sockopt_ops so_set __read_mostly = { + .pf = PF_INET, + .get_optmin = SO_IP_SET, + .get_optmax = SO_IP_SET + 1, + .get = &ip_set_sockfn_get, + .owner = THIS_MODULE, +}; + +static int __init +ip_set_init(void) +{ + int ret; + + if (max_sets) + ip_set_max = max_sets; + if (ip_set_max >= IPSET_INVALID_ID) + ip_set_max = IPSET_INVALID_ID - 1; + + ip_set_list = kzalloc(sizeof(struct ip_set *) * ip_set_max, + GFP_KERNEL); + if (!ip_set_list) { + pr_err("ip_set: Unable to create ip_set_list\n"); + return -ENOMEM; + } + + ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + if (ret != 0) { + pr_err("ip_set: cannot register with nfnetlink.\n"); + kfree(ip_set_list); + return ret; + } + ret = nf_register_sockopt(&so_set); + if (ret != 0) { + pr_err("SO_SET registry failed: %d\n", ret); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + kfree(ip_set_list); + return ret; + } + + pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + return 0; +} + +static void __exit +ip_set_fini(void) +{ + /* There can't be any existing set */ + nf_unregister_sockopt(&so_set); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + kfree(ip_set_list); + pr_debug("these are the famous last words\n"); +} + +module_init(ip_set_init); +module_exit(ip_set_fini); diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c new file mode 100644 index 0000000..76737bb --- /dev/null +++ b/net/netfilter/ipset/ip_set_getport.c @@ -0,0 +1,136 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Get Layer-4 data from the packets */ + +#include +#include +#include +#include +#include +#include + +#include + +/* We must handle non-linear skbs */ +static bool +get_port(const struct sk_buff *skb, int protocol, unsigned int protooff, + bool src, __be16 *port, u8 *proto) +{ + switch (protocol) { + case IPPROTO_TCP: { + struct tcphdr _tcph; + const struct tcphdr *th; + + th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph); + if (th == NULL) + /* No choice either */ + return false; + + *port = src ? th->source : th->dest; + break; + } + case IPPROTO_UDP: { + struct udphdr _udph; + const struct udphdr *uh; + + uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph); + if (uh == NULL) + /* No choice either */ + return false; + + *port = src ? uh->source : uh->dest; + break; + } + case IPPROTO_ICMP: { + struct icmphdr _ich; + const struct icmphdr *ic; + + ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); + if (ic == NULL) + return false; + + *port = (__force __be16)htons((ic->type << 8) | ic->code); + break; + } + case IPPROTO_ICMPV6: { + struct icmp6hdr _ich; + const struct icmp6hdr *ic; + + ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich); + if (ic == NULL) + return false; + + *port = (__force __be16) + htons((ic->icmp6_type << 8) | ic->icmp6_code); + break; + } + default: + break; + } + *proto = protocol; + + return true; +} + +bool +ip_set_get_ip4_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto) +{ + const struct iphdr *iph = ip_hdr(skb); + unsigned int protooff = ip_hdrlen(skb); + int protocol = iph->protocol; + + /* See comments at tcp_match in ip_tables.c */ + if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET)) + return false; + + return get_port(skb, protocol, protooff, src, port, proto); +} +EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); + +bool +ip_set_get_ip6_port(const struct sk_buff *skb, bool src, + __be16 *port, u8 *proto) +{ + unsigned int protooff = 0; + int protocol; + unsigned short fragoff; + + protocol = ipv6_find_hdr(skb, &protooff, -1, &fragoff); + if (protocol <= 0 || fragoff) + return false; + + return get_port(skb, protocol, protooff, src, port, proto); +} +EXPORT_SYMBOL_GPL(ip_set_get_ip6_port); + +bool +ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port) +{ + bool ret; + u8 proto; + + switch (pf) { + case AF_INET: + ret = ip_set_get_ip4_port(skb, src, port, &proto); + case AF_INET6: + ret = ip_set_get_ip6_port(skb, src, port, &proto); + default: + return false; + } + if (!ret) + return ret; + switch (proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + return true; + default: + return false; + } +} +EXPORT_SYMBOL_GPL(ip_set_get_ip_port); diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c new file mode 100644 index 0000000..23f8c81 --- /dev/null +++ b/net/netfilter/ipset/pfxlen.c @@ -0,0 +1,291 @@ +#include + +/* + * Prefixlen maps for fast conversions, by Jan Engelhardt. + */ + +#define E(a, b, c, d) \ + {.ip6 = { \ + __constant_htonl(a), __constant_htonl(b), \ + __constant_htonl(c), __constant_htonl(d), \ + } } + +/* + * This table works for both IPv4 and IPv6; + * just use prefixlen_netmask_map[prefixlength].ip. + */ +const union nf_inet_addr ip_set_netmask_map[] = { + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), +}; +EXPORT_SYMBOL_GPL(ip_set_netmask_map); + +#undef E +#define E(a, b, c, d) \ + {.ip6 = { (__force __be32) a, (__force __be32) b, \ + (__force __be32) c, (__force __be32) d, \ + } } + +/* + * This table works for both IPv4 and IPv6; + * just use prefixlen_hostmask_map[prefixlength].ip. + */ +const union nf_inet_addr ip_set_hostmask_map[] = { + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), +}; +EXPORT_SYMBOL_GPL(ip_set_hostmask_map); -- cgit v1.1 From 72205fc68bd13109576aa6c4c12c740962d28a6c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:33:17 +0100 Subject: netfilter: ipset: bitmap:ip set type support The module implements the bitmap:ip set type in two flavours, without and with timeout support. In this kind of set one can store IPv4 addresses (or network addresses) from a given range. In order not to waste memory, the timeout version does not rely on the kernel timer for every element to be timed out but on garbage collection. All set types use this mechanism. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 3 + net/netfilter/ipset/ip_set_bitmap_ip.c | 588 +++++++++++++++++++++++++++++++++ 3 files changed, 600 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_bitmap_ip.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 5ade156..b63a8ee 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -23,4 +23,13 @@ config IP_SET_MAX The value can be overriden by the 'max_sets' module parameter of the 'ip_set' module. +config IP_SET_BITMAP_IP + tristate "bitmap:ip set support" + depends on IP_SET + help + This option adds the bitmap:ip set type support, by which one + can store IPv4 addresses (or network addresse) from a range. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 910cd42..ea1c85e 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -6,3 +6,6 @@ ip_set-y := ip_set_core.o ip_set_getport.o pfxlen.o # ipset core obj-$(CONFIG_IP_SET) += ip_set.o + +# bitmap types +obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c new file mode 100644 index 0000000..0474400 --- /dev/null +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -0,0 +1,588 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the bitmap:ip type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#define IP_SET_BITMAP_TIMEOUT +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("bitmap:ip type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:ip"); + +/* Type structure */ +struct bitmap_ip { + void *members; /* the set members */ + u32 first_ip; /* host byte order, included in range */ + u32 last_ip; /* host byte order, included in range */ + u32 elements; /* number of max elements in the set */ + u32 hosts; /* number of hosts in a subnet */ + size_t memsize; /* members size */ + u8 netmask; /* subnet netmask */ + u32 timeout; /* timeout parameter */ + struct timer_list gc; /* garbage collection */ +}; + +/* Base variant */ + +static inline u32 +ip_to_id(const struct bitmap_ip *m, u32 ip) +{ + return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts; +} + +static int +bitmap_ip_test(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_ip *map = set->data; + u16 id = *(u16 *)value; + + return !!test_bit(id, map->members); +} + +static int +bitmap_ip_add(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ip *map = set->data; + u16 id = *(u16 *)value; + + if (test_and_set_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_ip_del(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ip *map = set->data; + u16 id = *(u16 *)value; + + if (!test_and_clear_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_ip_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ip *map = set->data; + struct nlattr *atd, *nested; + u32 id, first = cb->args[2]; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] < map->elements; cb->args[2]++) { + id = cb->args[2]; + if (!test_bit(id, map->members)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id * map->hosts)); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +/* Timeout variant */ + +static int +bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_ip *map = set->data; + const unsigned long *members = map->members; + u16 id = *(u16 *)value; + + return ip_set_timeout_test(members[id]); +} + +static int +bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ip *map = set->data; + unsigned long *members = map->members; + u16 id = *(u16 *)value; + + if (ip_set_timeout_test(members[id])) + return -IPSET_ERR_EXIST; + + members[id] = ip_set_timeout_set(timeout); + + return 0; +} + +static int +bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ip *map = set->data; + unsigned long *members = map->members; + u16 id = *(u16 *)value; + int ret = -IPSET_ERR_EXIST; + + if (ip_set_timeout_test(members[id])) + ret = 0; + + members[id] = IPSET_ELEM_UNSET; + return ret; +} + +static int +bitmap_ip_tlist(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ip *map = set->data; + struct nlattr *adt, *nested; + u32 id, first = cb->args[2]; + const unsigned long *members = map->members; + + adt = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!adt) + return -EMSGSIZE; + for (; cb->args[2] < map->elements; cb->args[2]++) { + id = cb->args[2]; + if (!ip_set_timeout_test(members[id])) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, adt); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id * map->hosts)); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(members[id]))); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, adt); + + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, adt); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static int +bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct bitmap_ip *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 ip; + + ip = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + if (ip < map->first_ip || ip > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + ip = ip_to_id(map, ip); + + return adtfn(set, &ip, map->timeout); +} + +static int +bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct bitmap_ip *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 timeout = map->timeout; + u32 ip, ip_to, id; + int ret = 0; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + if (ip < map->first_ip || ip > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(map->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST) { + id = ip_to_id(map, ip); + return adtfn(set, &id, timeout); + } + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) { + swap(ip, ip_to); + if (ip < map->first_ip) + return -IPSET_ERR_BITMAP_RANGE; + } + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + if (ip_to > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + for (; !before(ip_to, ip); ip += map->hosts) { + id = ip_to_id(map, ip); + ret = adtfn(set, &id, timeout);; + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static void +bitmap_ip_destroy(struct ip_set *set) +{ + struct bitmap_ip *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + + ip_set_free(map->members); + kfree(map); + + set->data = NULL; +} + +static void +bitmap_ip_flush(struct ip_set *set) +{ + struct bitmap_ip *map = set->data; + + memset(map->members, 0, map->memsize); +} + +static int +bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct bitmap_ip *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); + if (map->netmask != 32) + NLA_PUT_U8(skb, IPSET_ATTR_NETMASK, map->netmask); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static bool +bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct bitmap_ip *x = a->data; + const struct bitmap_ip *y = b->data; + + return x->first_ip == y->first_ip && + x->last_ip == y->last_ip && + x->netmask == y->netmask && + x->timeout == y->timeout; +} + +static const struct ip_set_type_variant bitmap_ip = { + .kadt = bitmap_ip_kadt, + .uadt = bitmap_ip_uadt, + .adt = { + [IPSET_ADD] = bitmap_ip_add, + [IPSET_DEL] = bitmap_ip_del, + [IPSET_TEST] = bitmap_ip_test, + }, + .destroy = bitmap_ip_destroy, + .flush = bitmap_ip_flush, + .head = bitmap_ip_head, + .list = bitmap_ip_list, + .same_set = bitmap_ip_same_set, +}; + +static const struct ip_set_type_variant bitmap_tip = { + .kadt = bitmap_ip_kadt, + .uadt = bitmap_ip_uadt, + .adt = { + [IPSET_ADD] = bitmap_ip_tadd, + [IPSET_DEL] = bitmap_ip_tdel, + [IPSET_TEST] = bitmap_ip_ttest, + }, + .destroy = bitmap_ip_destroy, + .flush = bitmap_ip_flush, + .head = bitmap_ip_head, + .list = bitmap_ip_tlist, + .same_set = bitmap_ip_same_set, +}; + +static void +bitmap_ip_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_ip *map = set->data; + unsigned long *table = map->members; + u32 id; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id < map->elements; id++) + if (ip_set_timeout_expired(table[id])) + table[id] = IPSET_ELEM_UNSET; + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +bitmap_ip_gc_init(struct ip_set *set) +{ + struct bitmap_ip *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_ip_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create bitmap:ip type of sets */ + +static bool +init_map_ip(struct ip_set *set, struct bitmap_ip *map, + u32 first_ip, u32 last_ip, + u32 elements, u32 hosts, u8 netmask) +{ + map->members = ip_set_alloc(map->memsize); + if (!map->members) + return false; + map->first_ip = first_ip; + map->last_ip = last_ip; + map->elements = elements; + map->hosts = hosts; + map->netmask = netmask; + map->timeout = IPSET_NO_TIMEOUT; + + set->data = map; + set->family = AF_INET; + + return true; +} + +static int +bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct bitmap_ip *map; + u32 first_ip, last_ip, hosts, elements; + u8 netmask = 32; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); + if (ret) + return ret; + if (first_ip > last_ip) { + u32 tmp = first_ip; + + first_ip = last_ip; + last_ip = tmp; + } + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr >= 32) + return -IPSET_ERR_INVALID_CIDR; + last_ip = first_ip | ~ip_set_hostmask(cidr); + } else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_NETMASK]) { + netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); + + if (netmask > 32) + return -IPSET_ERR_INVALID_NETMASK; + + first_ip &= ip_set_hostmask(netmask); + last_ip |= ~ip_set_hostmask(netmask); + } + + if (netmask == 32) { + hosts = 1; + elements = last_ip - first_ip + 1; + } else { + u8 mask_bits; + u32 mask; + + mask = range_to_mask(first_ip, last_ip, &mask_bits); + + if ((!mask && (first_ip || last_ip != 0xFFFFFFFF)) || + netmask <= mask_bits) + return -IPSET_ERR_BITMAP_RANGE; + + pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask); + hosts = 2 << (32 - netmask - 1); + elements = 2 << (netmask - mask_bits - 1); + } + if (elements > IPSET_BITMAP_MAX_RANGE + 1) + return -IPSET_ERR_BITMAP_RANGE_SIZE; + + pr_debug("hosts %u, elements %u\n", hosts, elements); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + map->memsize = elements * sizeof(unsigned long); + + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->variant = &bitmap_tip; + + bitmap_ip_gc_init(set); + } else { + map->memsize = bitmap_bytes(0, elements - 1); + + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + + set->variant = &bitmap_ip; + } + return 0; +} + +static struct ip_set_type bitmap_ip_type __read_mostly = { + .name = "bitmap:ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, + .family = AF_INET, + .revision = 0, + .create = bitmap_ip_create, + .create_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +bitmap_ip_init(void) +{ + return ip_set_type_register(&bitmap_ip_type); +} + +static void __exit +bitmap_ip_fini(void) +{ + ip_set_type_unregister(&bitmap_ip_type); +} + +module_init(bitmap_ip_init); +module_exit(bitmap_ip_fini); -- cgit v1.1 From de76021a1bb35e3560afccf741d1119a872aea49 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:35:12 +0100 Subject: netfilter: ipset: bitmap:ip,mac type support The module implements the bitmap:ip,mac set type in two flavours, without and with timeout support. In this kind of set one can store IPv4 address and (source) MAC address pairs. The type supports elements added without the MAC part filled out: when the first matching from kernel happens, the MAC part is automatically filled out. The timing out of the elements stars when an element is complete in the IP,MAC pair. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_bitmap_ipmac.c | 655 ++++++++++++++++++++++++++++++ 3 files changed, 665 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_bitmap_ipmac.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index b63a8ee..f18654c 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -32,4 +32,13 @@ config IP_SET_BITMAP_IP To compile it as a module, choose M here. If unsure, say N. +config IP_SET_BITMAP_IPMAC + tristate "bitmap:ip,mac set support" + depends on IP_SET + help + This option adds the bitmap:ip,mac set type support, by which one + can store IPv4 address and (source) MAC address pairs from a range. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index ea1c85e..f7a099f 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_IP_SET) += ip_set.o # bitmap types obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o +obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c new file mode 100644 index 0000000..d826332 --- /dev/null +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -0,0 +1,655 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the bitmap:ip,mac type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:ip,mac"); + +enum { + MAC_EMPTY, /* element is not set */ + MAC_FILLED, /* element is set with MAC */ + MAC_UNSET, /* element is set, without MAC */ +}; + +/* Type structure */ +struct bitmap_ipmac { + void *members; /* the set members */ + u32 first_ip; /* host byte order, included in range */ + u32 last_ip; /* host byte order, included in range */ + u32 timeout; /* timeout value */ + struct timer_list gc; /* garbage collector */ + size_t dsize; /* size of element */ +}; + +/* ADT structure for generic function args */ +struct ipmac { + u32 id; /* id in array */ + unsigned char *ether; /* ethernet address */ +}; + +/* Member element without and with timeout */ + +struct ipmac_elem { + unsigned char ether[ETH_ALEN]; + unsigned char match; +} __attribute__ ((aligned)); + +struct ipmac_telem { + unsigned char ether[ETH_ALEN]; + unsigned char match; + unsigned long timeout; +} __attribute__ ((aligned)); + +static inline void * +bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id) +{ + return (void *)((char *)map->members + id * map->dsize); +} + +static inline bool +bitmap_timeout(const struct bitmap_ipmac *map, u32 id) +{ + const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); + + return ip_set_timeout_test(elem->timeout); +} + +static inline bool +bitmap_expired(const struct bitmap_ipmac *map, u32 id) +{ + const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); + + return ip_set_timeout_expired(elem->timeout); +} + +static inline int +bitmap_ipmac_exist(const struct ipmac_telem *elem) +{ + return elem->match == MAC_UNSET || + (elem->match == MAC_FILLED && + !ip_set_timeout_expired(elem->timeout)); +} + +/* Base variant */ + +static int +bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + /* Trigger kernel to fill out the ethernet address */ + return -EAGAIN; + case MAC_FILLED: + return data->ether == NULL || + compare_ether_addr(data->ether, elem->ether) == 0; + } + return 0; +} + +static int +bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + if (!data->ether) + /* Already added without ethernet address */ + return -IPSET_ERR_EXIST; + /* Fill the MAC address */ + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + break; + case MAC_FILLED: + return -IPSET_ERR_EXIST; + case MAC_EMPTY: + if (data->ether) { + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + } else + elem->match = MAC_UNSET; + } + + return 0; +} + +static int +bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + if (elem->match == MAC_EMPTY) + return -IPSET_ERR_EXIST; + + elem->match = MAC_EMPTY; + + return 0; +} + +static int +bitmap_ipmac_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac_elem *elem; + struct nlattr *atd, *nested; + u32 id, first = cb->args[2]; + u32 last = map->last_ip - map->first_ip; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + elem = bitmap_ipmac_elem(map, id); + if (elem->match == MAC_EMPTY) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)); + if (elem->match == MAC_FILLED) + NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +/* Timeout variant */ + +static int +bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + /* Trigger kernel to fill out the ethernet address */ + return -EAGAIN; + case MAC_FILLED: + return (data->ether == NULL || + compare_ether_addr(data->ether, elem->ether) == 0) && + !bitmap_expired(map, data->id); + } + return 0; +} + +static int +bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); + + switch (elem->match) { + case MAC_UNSET: + if (!data->ether) + /* Already added without ethernet address */ + return -IPSET_ERR_EXIST; + /* Fill the MAC address and activate the timer */ + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + if (timeout == map->timeout) + /* Timeout was not specified, get stored one */ + timeout = elem->timeout; + elem->timeout = ip_set_timeout_set(timeout); + break; + case MAC_FILLED: + if (!bitmap_expired(map, data->id)) + return -IPSET_ERR_EXIST; + /* Fall through */ + case MAC_EMPTY: + if (data->ether) { + memcpy(elem->ether, data->ether, ETH_ALEN); + elem->match = MAC_FILLED; + } else + elem->match = MAC_UNSET; + /* If MAC is unset yet, we store plain timeout value + * because the timer is not activated yet + * and we can reuse it later when MAC is filled out, + * possibly by the kernel */ + elem->timeout = data->ether ? ip_set_timeout_set(timeout) + : timeout; + break; + } + + return 0; +} + +static int +bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_ipmac *map = set->data; + const struct ipmac *data = value; + struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); + + if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id)) + return -IPSET_ERR_EXIST; + + elem->match = MAC_EMPTY; + + return 0; +} + +static int +bitmap_ipmac_tlist(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_ipmac *map = set->data; + const struct ipmac_telem *elem; + struct nlattr *atd, *nested; + u32 id, first = cb->args[2]; + u32 timeout, last = map->last_ip - map->first_ip; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + elem = bitmap_ipmac_elem(map, id); + if (!bitmap_ipmac_exist(elem)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, + htonl(map->first_ip + id)); + if (elem->match == MAC_FILLED) + NLA_PUT(skb, IPSET_ATTR_ETHER, ETH_ALEN, + elem->ether); + timeout = elem->match == MAC_UNSET ? elem->timeout + : ip_set_timeout_get(elem->timeout); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + return -EMSGSIZE; +} + +static int +bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct bitmap_ipmac *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct ipmac data; + + data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC)); + if (data.id < map->first_ip || data.id > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + /* Backward compatibility: we don't check the second flag */ + if (skb_mac_header(skb) < skb->head || + (skb_mac_header(skb) + ETH_HLEN) > skb->data) + return -EINVAL; + + data.id -= map->first_ip; + data.ether = eth_hdr(skb)->h_source; + + return adtfn(set, &data, map->timeout); +} + +static int +bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct bitmap_ipmac *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct ipmac data; + u32 timeout = map->timeout; + int ret = 0; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id); + if (ret) + return ret; + + if (data.id < map->first_ip || data.id > map->last_ip) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_ETHER]) + data.ether = nla_data(tb[IPSET_ATTR_ETHER]); + else + data.ether = NULL; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(map->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + data.id -= map->first_ip; + + ret = adtfn(set, &data, timeout); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +static void +bitmap_ipmac_destroy(struct ip_set *set) +{ + struct bitmap_ipmac *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + + ip_set_free(map->members); + kfree(map); + + set->data = NULL; +} + +static void +bitmap_ipmac_flush(struct ip_set *set) +{ + struct bitmap_ipmac *map = set->data; + + memset(map->members, 0, + (map->last_ip - map->first_ip + 1) * map->dsize); +} + +static int +bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct bitmap_ipmac *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, htonl(map->first_ip)); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + + (map->last_ip - map->first_ip + 1) * map->dsize)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static bool +bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct bitmap_ipmac *x = a->data; + const struct bitmap_ipmac *y = b->data; + + return x->first_ip == y->first_ip && + x->last_ip == y->last_ip && + x->timeout == y->timeout; +} + +static const struct ip_set_type_variant bitmap_ipmac = { + .kadt = bitmap_ipmac_kadt, + .uadt = bitmap_ipmac_uadt, + .adt = { + [IPSET_ADD] = bitmap_ipmac_add, + [IPSET_DEL] = bitmap_ipmac_del, + [IPSET_TEST] = bitmap_ipmac_test, + }, + .destroy = bitmap_ipmac_destroy, + .flush = bitmap_ipmac_flush, + .head = bitmap_ipmac_head, + .list = bitmap_ipmac_list, + .same_set = bitmap_ipmac_same_set, +}; + +static const struct ip_set_type_variant bitmap_tipmac = { + .kadt = bitmap_ipmac_kadt, + .uadt = bitmap_ipmac_uadt, + .adt = { + [IPSET_ADD] = bitmap_ipmac_tadd, + [IPSET_DEL] = bitmap_ipmac_tdel, + [IPSET_TEST] = bitmap_ipmac_ttest, + }, + .destroy = bitmap_ipmac_destroy, + .flush = bitmap_ipmac_flush, + .head = bitmap_ipmac_head, + .list = bitmap_ipmac_tlist, + .same_set = bitmap_ipmac_same_set, +}; + +static void +bitmap_ipmac_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_ipmac *map = set->data; + struct ipmac_telem *elem; + u32 id, last = map->last_ip - map->first_ip; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id <= last; id++) { + elem = bitmap_ipmac_elem(map, id); + if (elem->match == MAC_FILLED && + ip_set_timeout_expired(elem->timeout)) + elem->match = MAC_EMPTY; + } + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +bitmap_ipmac_gc_init(struct ip_set *set) +{ + struct bitmap_ipmac *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_ipmac_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create bitmap:ip,mac type of sets */ + +static bool +init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, + u32 first_ip, u32 last_ip) +{ + map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize); + if (!map->members) + return false; + map->first_ip = first_ip; + map->last_ip = last_ip; + map->timeout = IPSET_NO_TIMEOUT; + + set->data = map; + set->family = AF_INET; + + return true; +} + +static int +bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], + u32 flags) +{ + u32 first_ip, last_ip, elements; + struct bitmap_ipmac *map; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &last_ip); + if (ret) + return ret; + if (first_ip > last_ip) { + u32 tmp = first_ip; + + first_ip = last_ip; + last_ip = tmp; + } + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr >= 32) + return -IPSET_ERR_INVALID_CIDR; + last_ip = first_ip | ~ip_set_hostmask(cidr); + } else + return -IPSET_ERR_PROTOCOL; + + elements = last_ip - first_ip + 1; + + if (elements > IPSET_BITMAP_MAX_RANGE + 1) + return -IPSET_ERR_BITMAP_RANGE_SIZE; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + map->dsize = sizeof(struct ipmac_telem); + + if (!init_map_ipmac(set, map, first_ip, last_ip)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = &bitmap_tipmac; + + bitmap_ipmac_gc_init(set); + } else { + map->dsize = sizeof(struct ipmac_elem); + + if (!init_map_ipmac(set, map, first_ip, last_ip)) { + kfree(map); + return -ENOMEM; + } + set->variant = &bitmap_ipmac; + + } + return 0; +} + +static struct ip_set_type bitmap_ipmac_type = { + .name = "bitmap:ip,mac", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, + .dimension = IPSET_DIM_TWO, + .family = AF_INET, + .revision = 0, + .create = bitmap_ipmac_create, + .create_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_ETHER] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +bitmap_ipmac_init(void) +{ + return ip_set_type_register(&bitmap_ipmac_type); +} + +static void __exit +bitmap_ipmac_fini(void) +{ + ip_set_type_unregister(&bitmap_ipmac_type); +} + +module_init(bitmap_ipmac_init); +module_exit(bitmap_ipmac_fini); -- cgit v1.1 From 543261907dc3c4e90845acfcd602ebdbfdfcb4f0 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:37:04 +0100 Subject: netfilter: ipset; bitmap:port set type support The module implements the bitmap:port type in two flavours, without and with timeout support to store TCP/UDP ports from a range. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_bitmap_port.c | 520 +++++++++++++++++++++++++++++++ 3 files changed, 530 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_bitmap_port.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index f18654c..f401e91 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -41,4 +41,13 @@ config IP_SET_BITMAP_IPMAC To compile it as a module, choose M here. If unsure, say N. +config IP_SET_BITMAP_PORT + tristate "bitmap:port set support" + depends on IP_SET + help + This option adds the bitmap:port set type support, by which one + can store TCP/UDP port numbers from a range. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index f7a099f..40866e2 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_IP_SET) += ip_set.o # bitmap types obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o +obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c new file mode 100644 index 0000000..92074bb --- /dev/null +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -0,0 +1,520 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the bitmap:port type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#define IP_SET_BITMAP_TIMEOUT +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("bitmap:port type of IP sets"); +MODULE_ALIAS("ip_set_bitmap:port"); + +/* Type structure */ +struct bitmap_port { + void *members; /* the set members */ + u16 first_port; /* host byte order, included in range */ + u16 last_port; /* host byte order, included in range */ + size_t memsize; /* members size */ + u32 timeout; /* timeout parameter */ + struct timer_list gc; /* garbage collection */ +}; + +/* Base variant */ + +static int +bitmap_port_test(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_port *map = set->data; + u16 id = *(u16 *)value; + + return !!test_bit(id, map->members); +} + +static int +bitmap_port_add(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_port *map = set->data; + u16 id = *(u16 *)value; + + if (test_and_set_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_port_del(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_port *map = set->data; + u16 id = *(u16 *)value; + + if (!test_and_clear_bit(id, map->members)) + return -IPSET_ERR_EXIST; + + return 0; +} + +static int +bitmap_port_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_port *map = set->data; + struct nlattr *atd, *nested; + u16 id, first = cb->args[2]; + u16 last = map->last_port - map->first_port; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + if (!test_bit(id, map->members)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, + htons(map->first_port + id)); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +/* Timeout variant */ + +static int +bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout) +{ + const struct bitmap_port *map = set->data; + const unsigned long *members = map->members; + u16 id = *(u16 *)value; + + return ip_set_timeout_test(members[id]); +} + +static int +bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_port *map = set->data; + unsigned long *members = map->members; + u16 id = *(u16 *)value; + + if (ip_set_timeout_test(members[id])) + return -IPSET_ERR_EXIST; + + members[id] = ip_set_timeout_set(timeout); + + return 0; +} + +static int +bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout) +{ + struct bitmap_port *map = set->data; + unsigned long *members = map->members; + u16 id = *(u16 *)value; + int ret = -IPSET_ERR_EXIST; + + if (ip_set_timeout_test(members[id])) + ret = 0; + + members[id] = IPSET_ELEM_UNSET; + return ret; +} + +static int +bitmap_port_tlist(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct bitmap_port *map = set->data; + struct nlattr *adt, *nested; + u16 id, first = cb->args[2]; + u16 last = map->last_port - map->first_port; + const unsigned long *members = map->members; + + adt = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!adt) + return -EMSGSIZE; + for (; cb->args[2] <= last; cb->args[2]++) { + id = cb->args[2]; + if (!ip_set_timeout_test(members[id])) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (id == first) { + nla_nest_cancel(skb, adt); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, + htons(map->first_port + id)); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(members[id]))); + ipset_nest_end(skb, nested); + } + ipset_nest_end(skb, adt); + + /* Set listing finished */ + cb->args[2] = 0; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, adt); + if (unlikely(id == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static int +bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct bitmap_port *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + __be16 __port; + u16 port = 0; + + if (!ip_set_get_ip_port(skb, pf, flags & IPSET_DIM_ONE_SRC, &__port)) + return -EINVAL; + + port = ntohs(__port); + + if (port < map->first_port || port > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + port -= map->first_port; + + return adtfn(set, &port, map->timeout); +} + +static int +bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct bitmap_port *map = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 timeout = map->timeout; + u32 port; /* wraparound */ + u16 id, port_to; + int ret = 0; + + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); + if (port < map->first_port || port > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(map->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST) { + id = port - map->first_port; + return adtfn(set, &id, timeout); + } + + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) { + swap(port, port_to); + if (port < map->first_port) + return -IPSET_ERR_BITMAP_RANGE; + } + } else + port_to = port; + + if (port_to > map->last_port) + return -IPSET_ERR_BITMAP_RANGE; + + for (; port <= port_to; port++) { + id = port - map->first_port; + ret = adtfn(set, &id, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static void +bitmap_port_destroy(struct ip_set *set) +{ + struct bitmap_port *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + + ip_set_free(map->members); + kfree(map); + + set->data = NULL; +} + +static void +bitmap_port_flush(struct ip_set *set) +{ + struct bitmap_port *map = set->data; + + memset(map->members, 0, map->memsize); +} + +static int +bitmap_port_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct bitmap_port *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, htons(map->first_port)); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->memsize)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static bool +bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct bitmap_port *x = a->data; + const struct bitmap_port *y = b->data; + + return x->first_port == y->first_port && + x->last_port == y->last_port && + x->timeout == y->timeout; +} + +static const struct ip_set_type_variant bitmap_port = { + .kadt = bitmap_port_kadt, + .uadt = bitmap_port_uadt, + .adt = { + [IPSET_ADD] = bitmap_port_add, + [IPSET_DEL] = bitmap_port_del, + [IPSET_TEST] = bitmap_port_test, + }, + .destroy = bitmap_port_destroy, + .flush = bitmap_port_flush, + .head = bitmap_port_head, + .list = bitmap_port_list, + .same_set = bitmap_port_same_set, +}; + +static const struct ip_set_type_variant bitmap_tport = { + .kadt = bitmap_port_kadt, + .uadt = bitmap_port_uadt, + .adt = { + [IPSET_ADD] = bitmap_port_tadd, + [IPSET_DEL] = bitmap_port_tdel, + [IPSET_TEST] = bitmap_port_ttest, + }, + .destroy = bitmap_port_destroy, + .flush = bitmap_port_flush, + .head = bitmap_port_head, + .list = bitmap_port_tlist, + .same_set = bitmap_port_same_set, +}; + +static void +bitmap_port_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct bitmap_port *map = set->data; + unsigned long *table = map->members; + u32 id; /* wraparound */ + u16 last = map->last_port - map->first_port; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (id = 0; id <= last; id++) + if (ip_set_timeout_expired(table[id])) + table[id] = IPSET_ELEM_UNSET; + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +bitmap_port_gc_init(struct ip_set *set) +{ + struct bitmap_port *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = bitmap_port_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create bitmap:ip type of sets */ + +static bool +init_map_port(struct ip_set *set, struct bitmap_port *map, + u16 first_port, u16 last_port) +{ + map->members = ip_set_alloc(map->memsize); + if (!map->members) + return false; + map->first_port = first_port; + map->last_port = last_port; + map->timeout = IPSET_NO_TIMEOUT; + + set->data = map; + set->family = AF_UNSPEC; + + return true; +} + +static int +bitmap_port_create(struct ip_set *set, struct nlattr *tb[], + u32 flags) +{ + struct bitmap_port *map; + u16 first_port, last_port; + + if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); + last_port = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (first_port > last_port) { + u16 tmp = first_port; + + first_port = last_port; + last_port = tmp; + } + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + map->memsize = (last_port - first_port + 1) + * sizeof(unsigned long); + + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + + map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + set->variant = &bitmap_tport; + + bitmap_port_gc_init(set); + } else { + map->memsize = bitmap_bytes(0, last_port - first_port); + pr_debug("memsize: %zu\n", map->memsize); + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + + set->variant = &bitmap_port; + } + return 0; +} + +static struct ip_set_type bitmap_port_type = { + .name = "bitmap:port", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_PORT, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = bitmap_port_create, + .create_policy = { + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +bitmap_port_init(void) +{ + return ip_set_type_register(&bitmap_port_type); +} + +static void __exit +bitmap_port_fini(void) +{ + ip_set_type_unregister(&bitmap_port_type); +} + +module_init(bitmap_port_init); +module_exit(bitmap_port_fini); -- cgit v1.1 From 6c027889696a7a694b0e2f6e3cabadefec7553b6 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:38:36 +0100 Subject: netfilter: ipset: hash:ip set type support The module implements the hash:ip type support in four flavours: for IPv4 or IPv6, both without and with timeout support. All the hash types are based on the "array hash" or ahash structure and functions as a good compromise between minimal memory footprint and speed. The hashing uses arrays to resolve clashes. The hash table is resized (doubled) when searching becomes too long. Resizing can be triggered by userspace add commands only and those are serialized by the nfnl mutex. During resizing the set is read-locked, so the only possible concurrent operations are the kernel side readers. Those are protected by RCU locking. Because of the four flavours and the other hash types, the functions are implemented in general forms in the ip_set_ahash.h header file and the real functions are generated before compiling by macro expansion. Thus the dereferencing of low-level functions and void pointer arguments could be avoided: the low-level functions are inlined, the function arguments are pointers of type-specific structures. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 3 + net/netfilter/ipset/ip_set_hash_ip.c | 467 +++++++++++++++++++++++++++++++++++ 3 files changed, 480 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_ip.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index f401e91..194d89c 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -50,4 +50,14 @@ config IP_SET_BITMAP_PORT To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IP + tristate "hash:ip set support" + depends on IP_SET + help + This option adds the hash:ip set type support, by which one + can store arbitrary IPv4 or IPv6 addresses (or network addresses) + in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 40866e2..5cbf00c 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -11,3 +11,6 @@ obj-$(CONFIG_IP_SET) += ip_set.o obj-$(CONFIG_IP_SET_BITMAP_IP) += ip_set_bitmap_ip.o obj-$(CONFIG_IP_SET_BITMAP_IPMAC) += ip_set_bitmap_ipmac.o obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o + +# hash types +obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c new file mode 100644 index 0000000..53964bc --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -0,0 +1,467 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip"); + +/* Type specific function prefix */ +#define TYPE hash_ip + +static bool +hash_ip_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ip4_same_set hash_ip_same_set +#define hash_ip6_same_set hash_ip_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ip4_elem { + __be32 ip; +}; + +/* Member elements with timeout support */ +struct hash_ip4_telem { + __be32 ip; + unsigned long timeout; +}; + +static inline bool +hash_ip4_data_equal(const struct hash_ip4_elem *ip1, + const struct hash_ip4_elem *ip2) +{ + return ip1->ip == ip2->ip; +} + +static inline bool +hash_ip4_data_isnull(const struct hash_ip4_elem *elem) +{ + return elem->ip == 0; +} + +static inline void +hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src) +{ + dst->ip = src->ip; +} + +/* Zero valued IP addresses cannot be stored */ +static inline void +hash_ip4_data_zero_out(struct hash_ip4_elem *elem) +{ + elem->ip = 0; +} + +static inline bool +hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data) +{ + const struct hash_ip4_telem *tdata = + (const struct hash_ip4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_NETMASK +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + __be32 ip; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip); + ip &= ip_set_netmask(h->netmask); + if (ip == 0) + return -EINVAL; + + return adtfn(set, &ip, h->timeout); +} + +static int +hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + u32 ip, ip_to, hosts, timeout = h->timeout; + __be32 nip; + int ret = 0; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ip &= ip_set_hostmask(h->netmask); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST) { + nip = htonl(ip); + if (nip == 0) + return -IPSET_ERR_HASH_ELEM; + return adtfn(set, &nip, timeout); + } + + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + + for (; !before(ip_to, ip); ip += hosts) { + nip = htonl(ip); + if (nip == 0) + return -IPSET_ERR_HASH_ELEM; + ret = adtfn(set, &nip, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_ip_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout && + x->netmask == y->netmask; +} + +/* The type variant functions: IPv6 */ + +struct hash_ip6_elem { + union nf_inet_addr ip; +}; + +struct hash_ip6_telem { + union nf_inet_addr ip; + unsigned long timeout; +}; + +static inline bool +hash_ip6_data_equal(const struct hash_ip6_elem *ip1, + const struct hash_ip6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0; +} + +static inline bool +hash_ip6_data_isnull(const struct hash_ip6_elem *elem) +{ + return ipv6_addr_any(&elem->ip.in6); +} + +static inline void +hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src) +{ + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); +} + +static inline void +hash_ip6_data_zero_out(struct hash_ip6_elem *elem) +{ + ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0); +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static bool +hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data) +{ + const struct hash_ip6_telem *e = + (const struct hash_ip6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + union nf_inet_addr ip; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &ip.in6); + ip6_netmask(&ip, h->netmask); + if (ipv6_addr_any(&ip.in6)) + return -EINVAL; + + return adtfn(set, &ip, h->timeout); +} + +static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, +}; + +static int +hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + union nf_inet_addr ip; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip); + if (ret) + return ret; + + ip6_netmask(&ip, h->netmask); + if (ipv6_addr_any(&ip.in6)) + return -IPSET_ERR_HASH_ELEM; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + ret = adtfn(set, &ip, timeout); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 netmask, hbits; + struct ip_set_hash *h; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + netmask = set->family == AF_INET ? 32 : 128; + pr_debug("Create set %s with family %s\n", + set->name, set->family == AF_INET ? "inet" : "inet6"); + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + if (tb[IPSET_ATTR_NETMASK]) { + netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); + + if ((set->family == AF_INET && netmask > 32) || + (set->family == AF_INET6 && netmask > 128) || + netmask == 0) + return -IPSET_ERR_INVALID_NETMASK; + } + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + h->netmask = netmask; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ip4_tvariant : &hash_ip6_tvariant; + + if (set->family == AF_INET) + hash_ip4_gc_init(set); + else + hash_ip6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ip4_variant : &hash_ip6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_ip_type __read_mostly = { + .name = "hash:ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ip_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ip_init(void) +{ + return ip_set_type_register(&hash_ip_type); +} + +static void __exit +hash_ip_fini(void) +{ + ip_set_type_unregister(&hash_ip_type); +} + +module_init(hash_ip_init); +module_exit(hash_ip_fini); -- cgit v1.1 From 07896ed37b94599a1b8ea97f4bd5766be71390f4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:39:52 +0100 Subject: netfilter: ipset: hash:ip,port set type support The module implements the hash:ip,port type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are two dimensional: IPv4/IPv6 address and protocol/port pairs. The port is interpeted for TCP, UPD, ICMP and ICMPv6 (at the latters as type/code of course). Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_ipport.c | 547 +++++++++++++++++++++++++++++++ 3 files changed, 557 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_ipport.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 194d89c..325b5312 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -60,4 +60,13 @@ config IP_SET_HASH_IP To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPPORT + tristate "hash:ip,port set support" + depends on IP_SET + help + This option adds the hash:ip,port set type support, by which one + can store IPv4/IPv6 address and protocol/port pairs. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 5cbf00c..6a3663e 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -14,3 +14,4 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o # hash types obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o +obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c new file mode 100644 index 0000000..d9b1928 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -0,0 +1,547 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port"); + +/* Type specific function prefix */ +#define TYPE hash_ipport + +static bool +hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipport4_same_set hash_ipport_same_set +#define hash_ipport6_same_set hash_ipport_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipport4_elem { + __be32 ip; + __be16 port; + u8 proto; + u8 padding; +}; + +/* Member elements with timeout support */ +struct hash_ipport4_telem { + __be32 ip; + __be16 port; + u8 proto; + u8 padding; + unsigned long timeout; +}; + +static inline bool +hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1, + const struct hash_ipport4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipport4_data_copy(struct hash_ipport4_elem *dst, + const struct hash_ipport4_elem *src) +{ + dst->ip = src->ip; + dst->port = src->port; + dst->proto = src->proto; +} + +static inline void +hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipport4_data_list(struct sk_buff *skb, + const struct hash_ipport4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipport4_data_tlist(struct sk_buff *skb, + const struct hash_ipport4_elem *data) +{ + const struct hash_ipport4_telem *tdata = + (const struct hash_ipport4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem data = { }; + + if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem data = { }; + u32 ip, ip_to, p, port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMP: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || + tb[IPSET_ATTR_PORT_TO])) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip = ntohl(data.ip); + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + port = ntohs(data.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } else + port_to = port; + + for (; !before(ip_to, ip); ip++) + for (p = port; p <= port_to; p++) { + data.ip = htonl(ip); + data.port = htons(p); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_ipport6_elem { + union nf_inet_addr ip; + __be16 port; + u8 proto; + u8 padding; +}; + +struct hash_ipport6_telem { + union nf_inet_addr ip; + __be16 port; + u8 proto; + u8 padding; + unsigned long timeout; +}; + +static inline bool +hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, + const struct hash_ipport6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipport6_data_copy(struct hash_ipport6_elem *dst, + const struct hash_ipport6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipport6_data_list(struct sk_buff *skb, + const struct hash_ipport6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipport6_data_tlist(struct sk_buff *skb, + const struct hash_ipport6_elem *data) +{ + const struct hash_ipport6_telem *e = + (const struct hash_ipport6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport6_elem data = { }; + + if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport6_elem data = { }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMPV6: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipport4_tvariant : &hash_ipport6_tvariant; + + if (set->family == AF_INET) + hash_ipport4_gc_init(set); + else + hash_ipport6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipport4_variant : &hash_ipport6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_ipport_type __read_mostly = { + .name = "hash:ip,port", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, + .dimension = IPSET_DIM_TWO, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipport_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipport_init(void) +{ + return ip_set_type_register(&hash_ipport_type); +} + +static void __exit +hash_ipport_fini(void) +{ + ip_set_type_unregister(&hash_ipport_type); +} + +module_init(hash_ipport_init); +module_exit(hash_ipport_fini); -- cgit v1.1 From 5663bc30e6114b6ba88cc428619ede46a3246a7b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:41:26 +0100 Subject: netfilter: ipset: hash:ip,port,ip set type support The module implements the hash:ip,port,ip type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are three dimensional: IPv4/IPv6 address, protocol/port and IPv4/IPv6 address triples. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_ipportip.c | 565 +++++++++++++++++++++++++++++ 3 files changed, 576 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_ipportip.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 325b5312..e693553 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -69,4 +69,14 @@ config IP_SET_HASH_IPPORT To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPPORTIP + tristate "hash:ip,port,ip set support" + depends on IP_SET + help + This option adds the hash:ip,port,ip set type support, by which + one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6 + address triples in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 6a3663e..e9ddb25 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o # hash types obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o +obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c new file mode 100644 index 0000000..80dae9d --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -0,0 +1,565 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port,ip type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port,ip"); + +/* Type specific function prefix */ +#define TYPE hash_ipportip + +static bool +hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipportip4_same_set hash_ipportip_same_set +#define hash_ipportip6_same_set hash_ipportip_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipportip4_elem { + __be32 ip; + __be32 ip2; + __be16 port; + u8 proto; + u8 padding; +}; + +/* Member elements with timeout support */ +struct hash_ipportip4_telem { + __be32 ip; + __be32 ip2; + __be16 port; + u8 proto; + u8 padding; + unsigned long timeout; +}; + +static inline bool +hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, + const struct hash_ipportip4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->ip2 == ip2->ip2 && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst, + const struct hash_ipportip4_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipportip4_data_list(struct sk_buff *skb, + const struct hash_ipportip4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipportip4_data_tlist(struct sk_buff *skb, + const struct hash_ipportip4_elem *data) +{ + const struct hash_ipportip4_telem *tdata = + (const struct hash_ipportip4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem data = { }; + + if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem data = { }; + u32 ip, ip_to, p, port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2); + if (ret) + return ret; + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMP: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || + tb[IPSET_ATTR_PORT_TO])) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip = ntohl(data.ip); + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + port = ntohs(data.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } else + port_to = port; + + for (; !before(ip_to, ip); ip++) + for (p = port; p <= port_to; p++) { + data.ip = htonl(ip); + data.port = htons(p); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_ipportip6_elem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + __be16 port; + u8 proto; + u8 padding; +}; + +struct hash_ipportip6_telem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + __be16 port; + u8 proto; + u8 padding; + unsigned long timeout; +}; + +static inline bool +hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, + const struct hash_ipportip6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst, + const struct hash_ipportip6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipportip6_data_list(struct sk_buff *skb, + const struct hash_ipportip6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipportip6_data_tlist(struct sk_buff *skb, + const struct hash_ipportip6_elem *data) +{ + const struct hash_ipportip6_telem *e = + (const struct hash_ipportip6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip6_elem data = { }; + + if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip6_elem data = { }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2); + if (ret) + return ret; + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMPV6: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant; + + if (set->family == AF_INET) + hash_ipportip4_gc_init(set); + else + hash_ipportip6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipportip4_variant : &hash_ipportip6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_ipportip_type __read_mostly = { + .name = "hash:ip,port,ip", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, + .dimension = IPSET_DIM_THREE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipportip_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipportip_init(void) +{ + return ip_set_type_register(&hash_ipportip_type); +} + +static void __exit +hash_ipportip_fini(void) +{ + ip_set_type_unregister(&hash_ipportip_type); +} + +module_init(hash_ipportip_init); +module_exit(hash_ipportip_fini); -- cgit v1.1 From 41d22f7b2e48c77175b62cec3797d7d7173a626e Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:51:00 +0100 Subject: netfilter: ipset: hash:ip,port,net set type support The module implements the hash:ip,port,net type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are three dimensional: IPv4/IPv6 address, protocol/port and IPv4/IPv6 network address/prefix triples. The different prefixes are searched/matched from the longest prefix to the shortes one (most specific to least). In other words the processing time linearly grows with the number of different prefixes in the set. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_ipportnet.c | 631 ++++++++++++++++++++++++++++ 3 files changed, 642 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_ipportnet.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index e693553..e2fbaa9 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -79,4 +79,14 @@ config IP_SET_HASH_IPPORTIP To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_IPPORTNET + tristate "hash:ip,port,net set support" + depends on IP_SET + help + This option adds the hash:ip,port,net set type support, by which + one can store IPv4/IPv6 address, protocol/port, and IPv4/IPv6 + network address/prefix triples in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index e9ddb25..9c5d857 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o +obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c new file mode 100644 index 0000000..8eacd8a --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -0,0 +1,631 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port,net type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:ip,port,net type of IP sets"); +MODULE_ALIAS("ip_set_hash:ip,port,net"); + +/* Type specific function prefix */ +#define TYPE hash_ipportnet + +static bool +hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_ipportnet4_same_set hash_ipportnet_same_set +#define hash_ipportnet6_same_set hash_ipportnet_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_ipportnet4_elem { + __be32 ip; + __be32 ip2; + __be16 port; + u8 cidr; + u8 proto; +}; + +/* Member elements with timeout support */ +struct hash_ipportnet4_telem { + __be32 ip; + __be32 ip2; + __be16 port; + u8 cidr; + u8 proto; + unsigned long timeout; +}; + +static inline bool +hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1, + const struct hash_ipportnet4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->ip2 == ip2->ip2 && + ip1->cidr == ip2->cidr && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst, + const struct hash_ipportnet4_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr) +{ + elem->ip2 &= ip_set_netmask(cidr); + elem->cidr = cidr; +} + +static inline void +hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_ipportnet4_data_list(struct sk_buff *skb, + const struct hash_ipportnet4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipportnet4_data_tlist(struct sk_buff *skb, + const struct hash_ipportnet4_elem *data) +{ + const struct hash_ipportnet4_telem *tdata = + (const struct hash_ipportnet4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP2, tdata->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem data = + { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + ip4addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2); + data.ip2 &= ip_set_netmask(data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; + u32 ip, ip_to, p, port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR2]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + data.ip2 &= ip_set_netmask(data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMP: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || + tb[IPSET_ATTR_PORT_TO])) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip = ntohl(data.ip); + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + } else if (tb[IPSET_ATTR_CIDR]) { + u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (cidr > 32) + return -IPSET_ERR_INVALID_CIDR; + ip &= ip_set_hostmask(cidr); + ip_to = ip | ~ip_set_hostmask(cidr); + } else + ip_to = ip; + + port = ntohs(data.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } else + port_to = port; + + for (; !before(ip_to, ip); ip++) + for (p = port; p <= port_to; p++) { + data.ip = htonl(ip); + data.port = htons(p); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_ipportnet6_elem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + __be16 port; + u8 cidr; + u8 proto; +}; + +struct hash_ipportnet6_telem { + union nf_inet_addr ip; + union nf_inet_addr ip2; + __be16 port; + u8 cidr; + u8 proto; + unsigned long timeout; +}; + +static inline bool +hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, + const struct hash_ipportnet6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 && + ip1->cidr == ip2->cidr && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline bool +hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst, + const struct hash_ipportnet6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem) +{ + elem->proto = 0; +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static inline void +hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip2, cidr); + elem->cidr = cidr; +} + +static bool +hash_ipportnet6_data_list(struct sk_buff *skb, + const struct hash_ipportnet6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_ipportnet6_data_tlist(struct sk_buff *skb, + const struct hash_ipportnet6_elem *data) +{ + const struct hash_ipportnet6_telem *e = + (const struct hash_ipportnet6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP2, &data->ip2); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR2, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet6_elem data = + { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6addrptr(skb, flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); + ip6_netmask(&data.ip2, data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet6_elem data = { .cidr = HOST_MASK }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + tb[IPSET_ATTR_IP_TO] || + tb[IPSET_ATTR_CIDR])) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR2]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&data.ip2, data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMPV6: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct ip_set_hash_nets) + * (set->family == AF_INET ? 32 : 128), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_ipportnet4_tvariant + : &hash_ipportnet6_tvariant; + + if (set->family == AF_INET) + hash_ipportnet4_gc_init(set); + else + hash_ipportnet6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_ipportnet4_variant : &hash_ipportnet6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_ipportnet_type __read_mostly = { + .name = "hash:ip,port,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, + .dimension = IPSET_DIM_THREE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_ipportnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_ipportnet_init(void) +{ + return ip_set_type_register(&hash_ipportnet_type); +} + +static void __exit +hash_ipportnet_fini(void) +{ + ip_set_type_unregister(&hash_ipportnet_type); +} + +module_init(hash_ipportnet_init); +module_exit(hash_ipportnet_fini); -- cgit v1.1 From b38370299eeaba4cf8a9e0c5c6acc2a1e049be23 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:52:54 +0100 Subject: netfilter: ipset: hash:net set type support The module implements the hash:net type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are one dimensional: IPv4/IPv6 network address/prefixes. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 9 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_net.c | 461 ++++++++++++++++++++++++++++++++++ 3 files changed, 471 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_net.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index e2fbaa9..8d85de0 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -89,4 +89,13 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NET + tristate "hash:net set support" + depends on IP_SET + help + This option adds the hash:net set type support, by which + one can store IPv4/IPv6 network address/prefix elements in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 9c5d857..fd5eeb6 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o +obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c new file mode 100644 index 0000000..fb0e6a6 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -0,0 +1,461 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:net type of IP sets"); +MODULE_ALIAS("ip_set_hash:net"); + +/* Type specific function prefix */ +#define TYPE hash_net + +static bool +hash_net_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_net4_same_set hash_net_same_set +#define hash_net6_same_set hash_net_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_net4_elem { + __be32 ip; + u16 padding0; + u8 padding1; + u8 cidr; +}; + +/* Member elements with timeout support */ +struct hash_net4_telem { + __be32 ip; + u16 padding0; + u8 padding1; + u8 cidr; + unsigned long timeout; +}; + +static inline bool +hash_net4_data_equal(const struct hash_net4_elem *ip1, + const struct hash_net4_elem *ip2) +{ + return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr; +} + +static inline bool +hash_net4_data_isnull(const struct hash_net4_elem *elem) +{ + return elem->cidr == 0; +} + +static inline void +hash_net4_data_copy(struct hash_net4_elem *dst, + const struct hash_net4_elem *src) +{ + dst->ip = src->ip; + dst->cidr = src->cidr; +} + +static inline void +hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr) +{ + elem->ip &= ip_set_netmask(cidr); + elem->cidr = cidr; +} + +/* Zero CIDR values cannot be stored */ +static inline void +hash_net4_data_zero_out(struct hash_net4_elem *elem) +{ + elem->cidr = 0; +} + +static bool +hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data) +{ + const struct hash_net4_telem *tdata = + (const struct hash_net4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, tdata->cidr); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_NETS + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + data.ip &= ip_set_netmask(data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + data.ip &= ip_set_netmask(data.cidr); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + ret = adtfn(set, &data, timeout); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +static bool +hash_net_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_net6_elem { + union nf_inet_addr ip; + u16 padding0; + u8 padding1; + u8 cidr; +}; + +struct hash_net6_telem { + union nf_inet_addr ip; + u16 padding0; + u8 padding1; + u8 cidr; + unsigned long timeout; +}; + +static inline bool +hash_net6_data_equal(const struct hash_net6_elem *ip1, + const struct hash_net6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ip1->cidr == ip2->cidr; +} + +static inline bool +hash_net6_data_isnull(const struct hash_net6_elem *elem) +{ + return elem->cidr == 0; +} + +static inline void +hash_net6_data_copy(struct hash_net6_elem *dst, + const struct hash_net6_elem *src) +{ + ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); + dst->cidr = src->cidr; +} + +static inline void +hash_net6_data_zero_out(struct hash_net6_elem *elem) +{ + elem->cidr = 0; +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static inline void +hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip, cidr); + elem->cidr = cidr; +} + +static bool +hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data) +{ + const struct hash_net6_telem *e = + (const struct hash_net6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, e->cidr); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net6_elem data = { .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6_netmask(&data.ip, data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net6_elem data = { .cidr = HOST_MASK }; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&data.ip, data.cidr); + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + ret = adtfn(set, &data, timeout); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + struct ip_set_hash *h; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct ip_set_hash_nets) + * (set->family == AF_INET ? 32 : 128), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_net4_tvariant : &hash_net6_tvariant; + + if (set->family == AF_INET) + hash_net4_gc_init(set); + else + hash_net6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_net4_variant : &hash_net6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_net_type __read_mostly = { + .name = "hash:net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_net_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_net_init(void) +{ + return ip_set_type_register(&hash_net_type); +} + +static void __exit +hash_net_fini(void) +{ + ip_set_type_unregister(&hash_net_type); +} + +module_init(hash_net_init); +module_exit(hash_net_fini); -- cgit v1.1 From 21f45020a3084f80fcdd5f056a0c6389f5406399 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:53:55 +0100 Subject: netfilter: ipset: hash:net,port set type support The module implements the hash:net,port type support in four flavours: for IPv4 and IPv6, both without and with timeout support. The elements are two dimensional: IPv4/IPv6 network address/prefix and protocol/port pairs. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 1 + net/netfilter/ipset/ip_set_hash_netport.c | 581 ++++++++++++++++++++++++++++++ 3 files changed, 592 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_hash_netport.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 8d85de0..2512e7b 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -98,4 +98,14 @@ config IP_SET_HASH_NET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETPORT + tristate "hash:net,port set support" + depends on IP_SET + help + This option adds the hash:net,port set type support, by which + one can store IPv4/IPv6 network address/prefix and + protocol/port pairs as elements in a set. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index fd5eeb6..fbbebd6 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o +obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c new file mode 100644 index 0000000..342250f --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -0,0 +1,581 @@ +/* Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net,port type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("hash:net,port type of IP sets"); +MODULE_ALIAS("ip_set_hash:net,port"); + +/* Type specific function prefix */ +#define TYPE hash_netport + +static bool +hash_netport_same_set(const struct ip_set *a, const struct ip_set *b); + +#define hash_netport4_same_set hash_netport_same_set +#define hash_netport6_same_set hash_netport_same_set + +/* The type variant functions: IPv4 */ + +/* Member elements without timeout */ +struct hash_netport4_elem { + __be32 ip; + __be16 port; + u8 proto; + u8 cidr; +}; + +/* Member elements with timeout support */ +struct hash_netport4_telem { + __be32 ip; + __be16 port; + u8 proto; + u8 cidr; + unsigned long timeout; +}; + +static inline bool +hash_netport4_data_equal(const struct hash_netport4_elem *ip1, + const struct hash_netport4_elem *ip2) +{ + return ip1->ip == ip2->ip && + ip1->port == ip2->port && + ip1->proto == ip2->proto && + ip1->cidr == ip2->cidr; +} + +static inline bool +hash_netport4_data_isnull(const struct hash_netport4_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_netport4_data_copy(struct hash_netport4_elem *dst, + const struct hash_netport4_elem *src) +{ + dst->ip = src->ip; + dst->port = src->port; + dst->proto = src->proto; + dst->cidr = src->cidr; +} + +static inline void +hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr) +{ + elem->ip &= ip_set_netmask(cidr); + elem->cidr = cidr; +} + +static inline void +hash_netport4_data_zero_out(struct hash_netport4_elem *elem) +{ + elem->proto = 0; +} + +static bool +hash_netport4_data_list(struct sk_buff *skb, + const struct hash_netport4_elem *data) +{ + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_netport4_data_tlist(struct sk_buff *skb, + const struct hash_netport4_elem *data) +{ + const struct hash_netport4_telem *tdata = + (const struct hash_netport4_telem *)data; + + NLA_PUT_IPADDR4(skb, IPSET_ATTR_IP, tdata->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, tdata->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(tdata->timeout))); + + return 0; + +nla_put_failure: + return 1; +} + +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS + +#define PF 4 +#define HOST_MASK 32 +#include + +static int +hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport4_elem data = { + .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + if (!ip_set_get_ip4_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip4addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip); + data.ip &= ip_set_netmask(data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport4_elem data = { .cidr = HOST_MASK }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + data.ip &= ip_set_netmask(data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMP: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static bool +hash_netport_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct ip_set_hash *x = a->data; + const struct ip_set_hash *y = b->data; + + /* Resizing changes htable_bits, so we ignore it */ + return x->maxelem == y->maxelem && + x->timeout == y->timeout; +} + +/* The type variant functions: IPv6 */ + +struct hash_netport6_elem { + union nf_inet_addr ip; + __be16 port; + u8 proto; + u8 cidr; +}; + +struct hash_netport6_telem { + union nf_inet_addr ip; + __be16 port; + u8 proto; + u8 cidr; + unsigned long timeout; +}; + +static inline bool +hash_netport6_data_equal(const struct hash_netport6_elem *ip1, + const struct hash_netport6_elem *ip2) +{ + return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 && + ip1->port == ip2->port && + ip1->proto == ip2->proto && + ip1->cidr == ip2->cidr; +} + +static inline bool +hash_netport6_data_isnull(const struct hash_netport6_elem *elem) +{ + return elem->proto == 0; +} + +static inline void +hash_netport6_data_copy(struct hash_netport6_elem *dst, + const struct hash_netport6_elem *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline void +hash_netport6_data_zero_out(struct hash_netport6_elem *elem) +{ + elem->proto = 0; +} + +static inline void +ip6_netmask(union nf_inet_addr *ip, u8 prefix) +{ + ip->ip6[0] &= ip_set_netmask6(prefix)[0]; + ip->ip6[1] &= ip_set_netmask6(prefix)[1]; + ip->ip6[2] &= ip_set_netmask6(prefix)[2]; + ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +} + +static inline void +hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr) +{ + ip6_netmask(&elem->ip, cidr); + elem->cidr = cidr; +} + +static bool +hash_netport6_data_list(struct sk_buff *skb, + const struct hash_netport6_elem *data) +{ + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &data->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + return 0; + +nla_put_failure: + return 1; +} + +static bool +hash_netport6_data_tlist(struct sk_buff *skb, + const struct hash_netport6_elem *data) +{ + const struct hash_netport6_telem *e = + (const struct hash_netport6_telem *)data; + + NLA_PUT_IPADDR6(skb, IPSET_ATTR_IP, &e->ip); + NLA_PUT_NET16(skb, IPSET_ATTR_PORT, data->port); + NLA_PUT_U8(skb, IPSET_ATTR_CIDR, data->cidr); + NLA_PUT_U8(skb, IPSET_ATTR_PROTO, data->proto); + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(e->timeout))); + return 0; + +nla_put_failure: + return 1; +} + +#undef PF +#undef HOST_MASK + +#define PF 6 +#define HOST_MASK 128 +#include + +static int +hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport6_elem data = { + .cidr = h->nets[0].cidr || HOST_MASK }; + + if (data.cidr == 0) + return -EINVAL; + if (adt == IPSET_TEST) + data.cidr = HOST_MASK; + + if (!ip_set_get_ip6_port(skb, flags & IPSET_DIM_TWO_SRC, + &data.port, &data.proto)) + return -EINVAL; + + ip6addrptr(skb, flags & IPSET_DIM_ONE_SRC, &data.ip.in6); + ip6_netmask(&data.ip, data.cidr); + + return adtfn(set, &data, h->timeout); +} + +static int +hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + const struct ip_set_hash *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport6_elem data = { .cidr = HOST_MASK }; + u32 port, port_to; + u32 timeout = h->timeout; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!data.cidr) + return -IPSET_ERR_INVALID_CIDR; + ip6_netmask(&data.ip, data.cidr); + + if (tb[IPSET_ATTR_PORT]) + data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + + if (data.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + switch (data.proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_ICMPV6: + break; + default: + data.port = 0; + break; + } + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout(h->timeout)) + return -IPSET_ERR_TIMEOUT; + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + if (adt == IPSET_TEST || + !(data.proto == IPPROTO_TCP || data.proto == IPPROTO_UDP) || + !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &data, timeout); + return ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(data.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + for (; port <= port_to; port++) { + data.port = htons(port); + ret = adtfn(set, &data, timeout); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +/* Create hash:ip type of sets */ + +static int +hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + struct ip_set_hash *h; + u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; + u8 hbits; + + if (!(set->family == AF_INET || set->family == AF_INET6)) + return -IPSET_ERR_INVALID_FAMILY; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_HASHSIZE]) { + hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); + if (hashsize < IPSET_MIMINAL_HASHSIZE) + hashsize = IPSET_MIMINAL_HASHSIZE; + } + + if (tb[IPSET_ATTR_MAXELEM]) + maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + + h = kzalloc(sizeof(*h) + + sizeof(struct ip_set_hash_nets) + * (set->family == AF_INET ? 32 : 128), GFP_KERNEL); + if (!h) + return -ENOMEM; + + h->maxelem = maxelem; + get_random_bytes(&h->initval, sizeof(h->initval)); + h->timeout = IPSET_NO_TIMEOUT; + + hbits = htable_bits(hashsize); + h->table = ip_set_alloc( + sizeof(struct htable) + + jhash_size(hbits) * sizeof(struct hbucket)); + if (!h->table) { + kfree(h); + return -ENOMEM; + } + h->table->htable_bits = hbits; + + set->data = h; + + if (tb[IPSET_ATTR_TIMEOUT]) { + h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + + set->variant = set->family == AF_INET + ? &hash_netport4_tvariant : &hash_netport6_tvariant; + + if (set->family == AF_INET) + hash_netport4_gc_init(set); + else + hash_netport6_gc_init(set); + } else { + set->variant = set->family == AF_INET + ? &hash_netport4_variant : &hash_netport6_variant; + } + + pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", + set->name, jhash_size(h->table->htable_bits), + h->table->htable_bits, h->maxelem, set->data, h->table); + + return 0; +} + +static struct ip_set_type hash_netport_type __read_mostly = { + .name = "hash:net,port", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, + .dimension = IPSET_DIM_TWO, + .family = AF_UNSPEC, + .revision = 0, + .create = hash_netport_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netport_init(void) +{ + return ip_set_type_register(&hash_netport_type); +} + +static void __exit +hash_netport_fini(void) +{ + ip_set_type_unregister(&hash_netport_type); +} + +module_init(hash_netport_init); +module_exit(hash_netport_fini); -- cgit v1.1 From f830837f0eed0f9e371b8fd65169365780814bb1 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:54:59 +0100 Subject: netfilter: ipset: list:set set type support The module implements the list:set type support in two flavours: without and with timeout. The sets has two sides: for the userspace, they store the names of other (non list:set type of) sets: one can add, delete and test set names. For the kernel, it forms an ordered union of the member sets: the members sets are tried in order when elements are added, deleted and tested and the process stops at the first success. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 10 + net/netfilter/ipset/Makefile | 3 + net/netfilter/ipset/ip_set_list_set.c | 584 ++++++++++++++++++++++++++++++++++ 3 files changed, 597 insertions(+) create mode 100644 net/netfilter/ipset/ip_set_list_set.c (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 2512e7b..3b970d3 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -108,4 +108,14 @@ config IP_SET_HASH_NETPORT To compile it as a module, choose M here. If unsure, say N. +config IP_SET_LIST_SET + tristate "list:set set support" + depends on IP_SET + help + This option adds the list:set set type support. In this + kind of set one can store the name of other sets and it forms + an ordered union of the member sets. + + To compile it as a module, choose M here. If unsure, say N. + endif # IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index fbbebd6..5adbdab 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -19,3 +19,6 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o + +# list types +obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c new file mode 100644 index 0000000..a47c329 --- /dev/null +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -0,0 +1,584 @@ +/* Copyright (C) 2008-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the list:set type */ + +#include +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("list:set type of IP sets"); +MODULE_ALIAS("ip_set_list:set"); + +/* Member elements without and with timeout */ +struct set_elem { + ip_set_id_t id; +}; + +struct set_telem { + ip_set_id_t id; + unsigned long timeout; +}; + +/* Type structure */ +struct list_set { + size_t dsize; /* element size */ + u32 size; /* size of set list array */ + u32 timeout; /* timeout value */ + struct timer_list gc; /* garbage collection */ + struct set_elem members[0]; /* the set members */ +}; + +static inline struct set_elem * +list_set_elem(const struct list_set *map, u32 id) +{ + return (struct set_elem *)((char *)map->members + id * map->dsize); +} + +static inline bool +list_set_timeout(const struct list_set *map, u32 id) +{ + const struct set_telem *elem = + (const struct set_telem *) list_set_elem(map, id); + + return ip_set_timeout_test(elem->timeout); +} + +static inline bool +list_set_expired(const struct list_set *map, u32 id) +{ + const struct set_telem *elem = + (const struct set_telem *) list_set_elem(map, id); + + return ip_set_timeout_expired(elem->timeout); +} + +static inline int +list_set_exist(const struct set_telem *elem) +{ + return elem->id != IPSET_INVALID_ID && + !ip_set_timeout_expired(elem->timeout); +} + +/* Set list without and with timeout */ + +static int +list_set_kadt(struct ip_set *set, const struct sk_buff *skb, + enum ipset_adt adt, u8 pf, u8 dim, u8 flags) +{ + struct list_set *map = set->data; + struct set_elem *elem; + u32 i; + int ret; + + for (i = 0; i < map->size; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) + return 0; + if (with_timeout(map->timeout) && list_set_expired(map, i)) + continue; + switch (adt) { + case IPSET_TEST: + ret = ip_set_test(elem->id, skb, pf, dim, flags); + if (ret > 0) + return ret; + break; + case IPSET_ADD: + ret = ip_set_add(elem->id, skb, pf, dim, flags); + if (ret == 0) + return ret; + break; + case IPSET_DEL: + ret = ip_set_del(elem->id, skb, pf, dim, flags); + if (ret == 0) + return ret; + break; + default: + break; + } + } + return -EINVAL; +} + +static bool +next_id_eq(const struct list_set *map, u32 i, ip_set_id_t id) +{ + const struct set_elem *elem; + + if (i + 1 < map->size) { + elem = list_set_elem(map, i + 1); + return !!(elem->id == id && + !(with_timeout(map->timeout) && + list_set_expired(map, i + 1))); + } + + return 0; +} + +static void +list_elem_add(struct list_set *map, u32 i, ip_set_id_t id) +{ + struct set_elem *e; + + for (; i < map->size; i++) { + e = list_set_elem(map, i); + swap(e->id, id); + if (e->id == IPSET_INVALID_ID) + break; + } +} + +static void +list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, + unsigned long timeout) +{ + struct set_telem *e; + + for (; i < map->size; i++) { + e = (struct set_telem *)list_set_elem(map, i); + swap(e->id, id); + if (e->id == IPSET_INVALID_ID) + break; + swap(e->timeout, timeout); + } +} + +static int +list_set_add(struct list_set *map, u32 i, ip_set_id_t id, + unsigned long timeout) +{ + const struct set_elem *e = list_set_elem(map, i); + + if (i == map->size - 1 && e->id != IPSET_INVALID_ID) + /* Last element replaced: e.g. add new,before,last */ + ip_set_put_byindex(e->id); + if (with_timeout(map->timeout)) + list_elem_tadd(map, i, id, timeout); + else + list_elem_add(map, i, id); + + return 0; +} + +static int +list_set_del(struct list_set *map, ip_set_id_t id, u32 i) +{ + struct set_elem *a = list_set_elem(map, i), *b; + + ip_set_put_byindex(id); + + for (; i < map->size - 1; i++) { + b = list_set_elem(map, i + 1); + a->id = b->id; + if (with_timeout(map->timeout)) + ((struct set_telem *)a)->timeout = + ((struct set_telem *)b)->timeout; + a = b; + if (a->id == IPSET_INVALID_ID) + break; + } + /* Last element */ + a->id = IPSET_INVALID_ID; + return 0; +} + +static int +list_set_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags) +{ + struct list_set *map = set->data; + bool with_timeout = with_timeout(map->timeout); + int before = 0; + u32 timeout = map->timeout; + ip_set_id_t id, refid = IPSET_INVALID_ID; + const struct set_elem *elem; + struct ip_set *s; + u32 i; + int ret = 0; + + if (unlikely(!tb[IPSET_ATTR_NAME] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); + if (id == IPSET_INVALID_ID) + return -IPSET_ERR_NAME; + /* "Loop detection" */ + if (s->type->features & IPSET_TYPE_NAME) { + ret = -IPSET_ERR_LOOP; + goto finish; + } + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + before = f & IPSET_FLAG_BEFORE; + } + + if (before && !tb[IPSET_ATTR_NAMEREF]) { + ret = -IPSET_ERR_BEFORE; + goto finish; + } + + if (tb[IPSET_ATTR_NAMEREF]) { + refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), + &s); + if (refid == IPSET_INVALID_ID) { + ret = -IPSET_ERR_NAMEREF; + goto finish; + } + if (!before) + before = -1; + } + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!with_timeout) { + ret = -IPSET_ERR_TIMEOUT; + goto finish; + } + timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); + } + + switch (adt) { + case IPSET_TEST: + for (i = 0; i < map->size && !ret; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID || + (before != 0 && i + 1 >= map->size)) + break; + else if (with_timeout && list_set_expired(map, i)) + continue; + else if (before > 0 && elem->id == id) + ret = next_id_eq(map, i, refid); + else if (before < 0 && elem->id == refid) + ret = next_id_eq(map, i, id); + else if (before == 0 && elem->id == id) + ret = 1; + } + break; + case IPSET_ADD: + for (i = 0; i < map->size && !ret; i++) { + elem = list_set_elem(map, i); + if (elem->id == id && + !(with_timeout && list_set_expired(map, i))) + ret = -IPSET_ERR_EXIST; + } + if (ret == -IPSET_ERR_EXIST) + break; + ret = -IPSET_ERR_LIST_FULL; + for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) + ret = before != 0 ? -IPSET_ERR_REF_EXIST + : list_set_add(map, i, id, timeout); + else if (elem->id != refid) + continue; + else if (with_timeout && list_set_expired(map, i)) + ret = -IPSET_ERR_REF_EXIST; + else if (before) + ret = list_set_add(map, i, id, timeout); + else if (i + 1 < map->size) + ret = list_set_add(map, i + 1, id, timeout); + } + break; + case IPSET_DEL: + ret = -IPSET_ERR_EXIST; + for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) { + elem = list_set_elem(map, i); + if (elem->id == IPSET_INVALID_ID) { + ret = before != 0 ? -IPSET_ERR_REF_EXIST + : -IPSET_ERR_EXIST; + break; + } else if (with_timeout && list_set_expired(map, i)) + continue; + else if (elem->id == id && + (before == 0 || + (before > 0 && + next_id_eq(map, i, refid)))) + ret = list_set_del(map, id, i); + else if (before < 0 && + elem->id == refid && + next_id_eq(map, i, id)) + ret = list_set_del(map, id, i + 1); + } + break; + default: + break; + } + +finish: + if (refid != IPSET_INVALID_ID) + ip_set_put_byindex(refid); + if (adt != IPSET_ADD || ret) + ip_set_put_byindex(id); + + return ip_set_eexist(ret, flags) ? 0 : ret; +} + +static void +list_set_flush(struct ip_set *set) +{ + struct list_set *map = set->data; + struct set_elem *elem; + u32 i; + + for (i = 0; i < map->size; i++) { + elem = list_set_elem(map, i); + if (elem->id != IPSET_INVALID_ID) { + ip_set_put_byindex(elem->id); + elem->id = IPSET_INVALID_ID; + } + } +} + +static void +list_set_destroy(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (with_timeout(map->timeout)) + del_timer_sync(&map->gc); + list_set_flush(set); + kfree(map); + + set->data = NULL; +} + +static int +list_set_head(struct ip_set *set, struct sk_buff *skb) +{ + const struct list_set *map = set->data; + struct nlattr *nested; + + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) + goto nla_put_failure; + NLA_PUT_NET32(skb, IPSET_ATTR_SIZE, htonl(map->size)); + if (with_timeout(map->timeout)) + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)); + NLA_PUT_NET32(skb, IPSET_ATTR_REFERENCES, + htonl(atomic_read(&set->ref) - 1)); + NLA_PUT_NET32(skb, IPSET_ATTR_MEMSIZE, + htonl(sizeof(*map) + map->size * map->dsize)); + ipset_nest_end(skb, nested); + + return 0; +nla_put_failure: + return -EMSGSIZE; +} + +static int +list_set_list(const struct ip_set *set, + struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct list_set *map = set->data; + struct nlattr *atd, *nested; + u32 i, first = cb->args[2]; + const struct set_elem *e; + + atd = ipset_nest_start(skb, IPSET_ATTR_ADT); + if (!atd) + return -EMSGSIZE; + for (; cb->args[2] < map->size; cb->args[2]++) { + i = cb->args[2]; + e = list_set_elem(map, i); + if (e->id == IPSET_INVALID_ID) + goto finish; + if (with_timeout(map->timeout) && list_set_expired(map, i)) + continue; + nested = ipset_nest_start(skb, IPSET_ATTR_DATA); + if (!nested) { + if (i == first) { + nla_nest_cancel(skb, atd); + return -EMSGSIZE; + } else + goto nla_put_failure; + } + NLA_PUT_STRING(skb, IPSET_ATTR_NAME, + ip_set_name_byindex(e->id)); + if (with_timeout(map->timeout)) { + const struct set_telem *te = + (const struct set_telem *) e; + NLA_PUT_NET32(skb, IPSET_ATTR_TIMEOUT, + htonl(ip_set_timeout_get(te->timeout))); + } + ipset_nest_end(skb, nested); + } +finish: + ipset_nest_end(skb, atd); + /* Set listing finished */ + cb->args[2] = 0; + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nested); + ipset_nest_end(skb, atd); + if (unlikely(i == first)) { + cb->args[2] = 0; + return -EMSGSIZE; + } + return 0; +} + +static bool +list_set_same_set(const struct ip_set *a, const struct ip_set *b) +{ + const struct list_set *x = a->data; + const struct list_set *y = b->data; + + return x->size == y->size && + x->timeout == y->timeout; +} + +static const struct ip_set_type_variant list_set = { + .kadt = list_set_kadt, + .uadt = list_set_uadt, + .destroy = list_set_destroy, + .flush = list_set_flush, + .head = list_set_head, + .list = list_set_list, + .same_set = list_set_same_set, +}; + +static void +list_set_gc(unsigned long ul_set) +{ + struct ip_set *set = (struct ip_set *) ul_set; + struct list_set *map = set->data; + struct set_telem *e; + u32 i; + + /* We run parallel with other readers (test element) + * but adding/deleting new entries is locked out */ + read_lock_bh(&set->lock); + for (i = map->size - 1; i >= 0; i--) { + e = (struct set_telem *) list_set_elem(map, i); + if (e->id != IPSET_INVALID_ID && + list_set_expired(map, i)) + list_set_del(map, e->id, i); + } + read_unlock_bh(&set->lock); + + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +static void +list_set_gc_init(struct ip_set *set) +{ + struct list_set *map = set->data; + + init_timer(&map->gc); + map->gc.data = (unsigned long) set; + map->gc.function = list_set_gc; + map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + add_timer(&map->gc); +} + +/* Create list:set type of sets */ + +static bool +init_list_set(struct ip_set *set, u32 size, size_t dsize, + unsigned long timeout) +{ + struct list_set *map; + struct set_elem *e; + u32 i; + + map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); + if (!map) + return false; + + map->size = size; + map->dsize = dsize; + map->timeout = timeout; + set->data = map; + + for (i = 0; i < size; i++) { + e = list_set_elem(map, i); + e->id = IPSET_INVALID_ID; + } + + return true; +} + +static int +list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ + u32 size = IP_SET_LIST_DEFAULT_SIZE; + + if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_SIZE]) + size = ip_set_get_h32(tb[IPSET_ATTR_SIZE]); + if (size < IP_SET_LIST_MIN_SIZE) + size = IP_SET_LIST_MIN_SIZE; + + if (tb[IPSET_ATTR_TIMEOUT]) { + if (!init_list_set(set, size, sizeof(struct set_telem), + ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]))) + return -ENOMEM; + + list_set_gc_init(set); + } else { + if (!init_list_set(set, size, sizeof(struct set_elem), + IPSET_NO_TIMEOUT)) + return -ENOMEM; + } + set->variant = &list_set; + return 0; +} + +static struct ip_set_type list_set_type __read_mostly = { + .name = "list:set", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, + .dimension = IPSET_DIM_ONE, + .family = AF_UNSPEC, + .revision = 0, + .create = list_set_create, + .create_policy = { + [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_NAME] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_NAMEREF] = { .type = NLA_STRING, + .len = IPSET_MAXNAMELEN }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .me = THIS_MODULE, +}; + +static int __init +list_set_init(void) +{ + return ip_set_type_register(&list_set_type); +} + +static void __exit +list_set_fini(void) +{ + ip_set_type_unregister(&list_set_type); +} + +module_init(list_set_init); +module_exit(list_set_fini); -- cgit v1.1 From d956798d82d2d331c031301965d69e17a1a48a2b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 1 Feb 2011 15:56:00 +0100 Subject: netfilter: xtables: "set" match and "SET" target support The patch adds the combined module of the "SET" target and "set" match to netfilter. Both the previous and the current revisions are supported. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 12 ++ net/netfilter/Makefile | 1 + net/netfilter/xt_set.c | 359 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 372 insertions(+) create mode 100644 net/netfilter/xt_set.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 351abf8..06fa9e4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -352,6 +352,18 @@ config NETFILTER_XT_CONNMARK ctmark), similarly to the packet mark (nfmark). Using this target and match, you can set and match on this mark. +config NETFILTER_XT_SET + tristate 'set target and match support' + depends on IP_SET + depends on NETFILTER_ADVANCED + help + This option adds the "SET" target and "set" match. + + Using this target and match, you can add/delete and match + elements in the sets created by ipset(8). + + To compile it as a module, choose M here. If unsure, say N. + # alphabetically ordered list of targets comment "Xtables targets" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 510b586..1148643 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # combos obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o +obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c new file mode 100644 index 0000000..061d48c --- /dev/null +++ b/net/netfilter/xt_set.c @@ -0,0 +1,359 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module which implements the set match and SET target + * for netfilter/iptables. */ + +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("Xtables: IP set match and target module"); +MODULE_ALIAS("xt_SET"); +MODULE_ALIAS("ipt_set"); +MODULE_ALIAS("ip6t_set"); +MODULE_ALIAS("ipt_SET"); +MODULE_ALIAS("ip6t_SET"); + +static inline int +match_set(ip_set_id_t index, const struct sk_buff *skb, + u8 pf, u8 dim, u8 flags, int inv) +{ + if (ip_set_test(index, skb, pf, dim, flags)) + inv = !inv; + return inv; +} + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + +static bool +set_match_v0(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v0 *info = par->matchinfo; + + return match_set(info->match_set.index, skb, par->family, + info->match_set.u.compat.dim, + info->match_set.u.compat.flags, + info->match_set.u.compat.flags & IPSET_INV_MATCH); +} + +static void +compat_flags(struct xt_set_info_v0 *info) +{ + u_int8_t i; + + /* Fill out compatibility data according to enum ip_set_kopt */ + info->u.compat.dim = IPSET_DIM_ZERO; + if (info->u.flags[0] & IPSET_MATCH_INV) + info->u.compat.flags |= IPSET_INV_MATCH; + for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) { + info->u.compat.dim++; + if (info->u.flags[i] & IPSET_SRC) + info->u.compat.flags |= (1<u.compat.dim); + } +} + +static int +set_match_v0_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match_v0 *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match\n", + info->match_set.index); + return -ENOENT; + } + if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { + pr_warning("Protocol error: set match dimension " + "is over the limit!\n"); + return -ERANGE; + } + + /* Fill out compatibility data */ + compat_flags(&info->match_set); + + return 0; +} + +static void +set_match_v0_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match_v0 *info = par->matchinfo; + + ip_set_nfnl_put(info->match_set.index); +} + +static unsigned int +set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_set_info_target_v0 *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, skb, par->family, + info->add_set.u.compat.dim, + info->add_set.u.compat.flags); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, skb, par->family, + info->del_set.u.compat.dim, + info->del_set.u.compat.flags); + + return XT_CONTINUE; +} + +static int +set_target_v0_checkentry(const struct xt_tgchk_param *par) +{ + struct xt_set_info_target_v0 *info = par->targinfo; + ip_set_id_t index; + + if (info->add_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->add_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find add_set index %u as target\n", + info->add_set.index); + return -ENOENT; + } + } + + if (info->del_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->del_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find del_set index %u as target\n", + info->del_set.index); + return -ENOENT; + } + } + if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 || + info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) { + pr_warning("Protocol error: SET target dimension " + "is over the limit!\n"); + return -ERANGE; + } + + /* Fill out compatibility data */ + compat_flags(&info->add_set); + compat_flags(&info->del_set); + + return 0; +} + +static void +set_target_v0_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_set_info_target_v0 *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); +} + +/* Revision 1: current interface to netfilter/iptables */ + +static bool +set_match(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match *info = par->matchinfo; + + return match_set(info->match_set.index, skb, par->family, + info->match_set.dim, + info->match_set.flags, + info->match_set.flags & IPSET_INV_MATCH); +} + +static int +set_match_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match\n", + info->match_set.index); + return -ENOENT; + } + if (info->match_set.dim > IPSET_DIM_MAX) { + pr_warning("Protocol error: set match dimension " + "is over the limit!\n"); + return -ERANGE; + } + + return 0; +} + +static void +set_match_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match *info = par->matchinfo; + + ip_set_nfnl_put(info->match_set.index); +} + +static unsigned int +set_target(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_add(info->add_set.index, + skb, par->family, + info->add_set.dim, + info->add_set.flags); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_del(info->del_set.index, + skb, par->family, + info->add_set.dim, + info->del_set.flags); + + return XT_CONTINUE; +} + +static int +set_target_checkentry(const struct xt_tgchk_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + ip_set_id_t index; + + if (info->add_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->add_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find add_set index %u as target\n", + info->add_set.index); + return -ENOENT; + } + } + + if (info->del_set.index != IPSET_INVALID_ID) { + index = ip_set_nfnl_get_byindex(info->del_set.index); + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find del_set index %u as target\n", + info->del_set.index); + return -ENOENT; + } + } + if (info->add_set.dim > IPSET_DIM_MAX || + info->del_set.flags > IPSET_DIM_MAX) { + pr_warning("Protocol error: SET target dimension " + "is over the limit!\n"); + return -ERANGE; + } + + return 0; +} + +static void +set_target_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_set_info_target *info = par->targinfo; + + if (info->add_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->add_set.index); + if (info->del_set.index != IPSET_INVALID_ID) + ip_set_nfnl_put(info->del_set.index); +} + +static struct xt_match set_matches[] __read_mostly = { + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 0, + .match = set_match_v0, + .matchsize = sizeof(struct xt_set_info_match_v0), + .checkentry = set_match_v0_checkentry, + .destroy = set_match_v0_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 1, + .match = set_match, + .matchsize = sizeof(struct xt_set_info_match), + .checkentry = set_match_checkentry, + .destroy = set_match_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV6, + .revision = 1, + .match = set_match, + .matchsize = sizeof(struct xt_set_info_match), + .checkentry = set_match_checkentry, + .destroy = set_match_destroy, + .me = THIS_MODULE + }, +}; + +static struct xt_target set_targets[] __read_mostly = { + { + .name = "SET", + .revision = 0, + .family = NFPROTO_IPV4, + .target = set_target_v0, + .targetsize = sizeof(struct xt_set_info_target_v0), + .checkentry = set_target_v0_checkentry, + .destroy = set_target_v0_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 1, + .family = NFPROTO_IPV4, + .target = set_target, + .targetsize = sizeof(struct xt_set_info_target), + .checkentry = set_target_checkentry, + .destroy = set_target_destroy, + .me = THIS_MODULE + }, + { + .name = "SET", + .revision = 1, + .family = NFPROTO_IPV6, + .target = set_target, + .targetsize = sizeof(struct xt_set_info_target), + .checkentry = set_target_checkentry, + .destroy = set_target_destroy, + .me = THIS_MODULE + }, +}; + +static int __init xt_set_init(void) +{ + int ret = xt_register_matches(set_matches, ARRAY_SIZE(set_matches)); + + if (!ret) { + ret = xt_register_targets(set_targets, + ARRAY_SIZE(set_targets)); + if (ret) + xt_unregister_matches(set_matches, + ARRAY_SIZE(set_matches)); + } + return ret; +} + +static void __exit xt_set_fini(void) +{ + xt_unregister_matches(set_matches, ARRAY_SIZE(set_matches)); + xt_unregister_targets(set_targets, ARRAY_SIZE(set_targets)); +} + +module_init(xt_set_init); +module_exit(xt_set_fini); -- cgit v1.1 From 8da560ced56c423cd6d35803cd0244c944c676bd Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Feb 2011 16:27:25 +0100 Subject: netfilter: ipset: use nla_parse_nested() Replace calls of the form: nla_parse(tb, ATTR_MAX, nla_data(attr), nla_len(attr), policy) by: nla_parse_nested(tb, ATTR_MAX, attr, policy) Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_core.c | 42 +++++++++++++++------------------------ 1 file changed, 16 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8a73624..ae0f8b5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -246,8 +246,7 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla), - ipaddr_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; @@ -265,8 +264,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; - if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla), - ipaddr_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; @@ -666,10 +664,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && - nla_parse(tb, IPSET_ATTR_CREATE_MAX, - nla_data(attr[IPSET_ATTR_DATA]), - nla_len(attr[IPSET_ATTR_DATA]), - set->type->create_policy)) { + nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], + set->type->create_policy)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } @@ -1169,10 +1165,9 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(attr[IPSET_ATTR_DATA]), - nla_len(attr[IPSET_ATTR_DATA]), - set->type->adt_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, + attr[IPSET_ATTR_DATA], + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno); } else { @@ -1182,9 +1177,8 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, memset(tb, 0, sizeof(tb)); if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(nla), nla_len(nla), - set->type->adt_policy)) + nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno); @@ -1224,10 +1218,9 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { - if (nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(attr[IPSET_ATTR_DATA]), - nla_len(attr[IPSET_ATTR_DATA]), - set->type->adt_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, + attr[IPSET_ATTR_DATA], + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno); } else { @@ -1237,9 +1230,8 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, memset(tb, 0, sizeof(*tb)); if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || - nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(nla), nla_len(nla), - set->type->adt_policy)) + nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno); @@ -1269,10 +1261,8 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, if (set == NULL) return -ENOENT; - if (nla_parse(tb, IPSET_ATTR_ADT_MAX, - nla_data(attr[IPSET_ATTR_DATA]), - nla_len(attr[IPSET_ATTR_DATA]), - set->type->adt_policy)) + if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], + set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; read_lock_bh(&set->lock); -- cgit v1.1 From 582e1fc85ca3727abd4e99109a267c514ea5c260 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Feb 2011 16:57:37 +0100 Subject: netfilter: ipset: remove unnecessary includes None of the set types need uaccess.h since this is handled centrally in ip_set_core. Most set types additionally don't need bitops.h and spinlock.h since they use neither. tcp.h is only needed by those using before(), udp.h is not needed at all. Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_bitmap_ip.c | 1 - net/netfilter/ipset/ip_set_bitmap_ipmac.c | 3 --- net/netfilter/ipset/ip_set_bitmap_port.c | 5 ----- net/netfilter/ipset/ip_set_hash_ip.c | 3 --- net/netfilter/ipset/ip_set_hash_ipport.c | 3 --- net/netfilter/ipset/ip_set_hash_ipportip.c | 3 --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 3 --- net/netfilter/ipset/ip_set_hash_net.c | 3 --- net/netfilter/ipset/ip_set_hash_netport.c | 3 --- 9 files changed, 27 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 0474400..bca9699 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index d826332..5e79017 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -15,9 +15,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 92074bb..165f09b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -9,13 +9,8 @@ #include #include -#include -#include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 53964bc..43bcce2 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index d9b1928..adbe787 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 80dae9d..22e23ab 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 8eacd8a..6033e8b 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index fb0e6a6..c4db202 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 342250f..34a1656 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include #include #include #include -- cgit v1.1 From a00f1f3686d6a062b5295c092a9dff059adbdbf5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 1 Feb 2011 17:26:37 +0100 Subject: netfilter: ctnetlink: fix ctnetlink_parse_tuple() warning net/netfilter/nf_conntrack_netlink.c: In function 'ctnetlink_parse_tuple': net/netfilter/nf_conntrack_netlink.c:832:11: warning: comparison between 'enum ctattr_tuple' and 'enum ctattr_type' Use ctattr_type for the 'type' parameter since that's the type of all attributes passed to this function. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 61c7394..cb1a819 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -803,7 +803,7 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { static int ctnetlink_parse_tuple(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, - enum ctattr_tuple type, u_int8_t l3num) + enum ctattr_type type, u_int8_t l3num) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; -- cgit v1.1 From a870c8c5cbe41bcf42cf4fa9f43d969b5134090b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:21:53 +0100 Subject: IPVS: use z modifier for sizeof() argument Reported-by: Randy Dunlap Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d889f4f..4d06617 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1887,7 +1887,7 @@ static int __net_init __ip_vs_init(struct net *net) ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; - printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n", + printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", sizeof(struct netns_ipvs), ipvs->gen); return 0; } -- cgit v1.1 From 258e958b85cef23b1598515504426e8d0576d223 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:24:09 +0100 Subject: IPVS: remove duplicate initialisation or rs_table Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_ctl.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 98df59a..d7c2fa8 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3515,9 +3515,6 @@ int __net_init __ip_vs_control_init(struct net *net) } spin_lock_init(&ipvs->tot_stats->lock); - for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); - proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); proc_net_fops_create(net, "ip_vs_stats_percpu", 0, -- cgit v1.1 From 0443929ff0ecc4d1e690edaffa338cabe0863d3b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:29:04 +0100 Subject: IPVS: Allow compilation with CONFIG_SYSCTL disabled This is a rather naieve approach to allowing PVS to compile with CONFIG_SYSCTL disabled. I am working on a more comprehensive patch which will remove compilation of all sysctl-related IPVS code when CONFIG_SYSCTL is disabled. Reported-by: Randy Dunlap Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_ctl.c | 14 +++++++++----- net/netfilter/ipvs/ip_vs_lblc.c | 20 ++++++++++---------- net/netfilter/ipvs/ip_vs_lblcr.c | 20 ++++++++++---------- 3 files changed, 29 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d7c2fa8..c73b0c8 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3552,10 +3552,15 @@ int __net_init __ip_vs_control_init(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; +#ifdef CONFIG_SYSCTL ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, tbl); - if (ipvs->sysctl_hdr == NULL) - goto err_reg; + if (ipvs->sysctl_hdr == NULL) { + if (!net_eq(net, &init_net)) + kfree(tbl); + goto err_dup; + } +#endif ip_vs_new_estimator(net, ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ @@ -3563,9 +3568,6 @@ int __net_init __ip_vs_control_init(struct net *net) schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); return 0; -err_reg: - if (!net_eq(net, &init_net)) - kfree(tbl); err_dup: free_percpu(ipvs->cpustats); err_alloc: @@ -3581,7 +3583,9 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) ip_vs_kill_estimator(net, ipvs->tot_stats); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); +#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->sysctl_hdr); +#endif proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index d5bec33..00b5ffa 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -554,33 +554,33 @@ static int __net_init __ip_vs_lblc_init(struct net *net) sizeof(vs_vars_table), GFP_KERNEL); if (ipvs->lblc_ctl_table == NULL) - goto err_dup; + return -ENOMEM; } else ipvs->lblc_ctl_table = vs_vars_table; ipvs->sysctl_lblc_expiration = 24*60*60*HZ; ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; +#ifdef CONFIG_SYSCTL ipvs->lblc_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblc_ctl_table); - if (!ipvs->lblc_ctl_header) - goto err_reg; + if (!ipvs->lblc_ctl_header) { + if (!net_eq(net, &init_net)) + kfree(ipvs->lblc_ctl_table); + return -ENOMEM; + } +#endif return 0; - -err_reg: - if (!net_eq(net, &init_net)) - kfree(ipvs->lblc_ctl_table); - -err_dup: - return -ENOMEM; } static void __net_exit __ip_vs_lblc_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); +#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblc_ctl_header); +#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblc_ctl_table); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 61ae8cf..bfa25f1 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -754,33 +754,33 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) sizeof(vs_vars_table), GFP_KERNEL); if (ipvs->lblcr_ctl_table == NULL) - goto err_dup; + return -ENOMEM; } else ipvs->lblcr_ctl_table = vs_vars_table; ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; +#ifdef CONFIG_SYSCTL ipvs->lblcr_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblcr_ctl_table); - if (!ipvs->lblcr_ctl_header) - goto err_reg; + if (!ipvs->lblcr_ctl_header) { + if (!net_eq(net, &init_net)) + kfree(ipvs->lblcr_ctl_table); + return -ENOMEM; + } +#endif return 0; - -err_reg: - if (!net_eq(net, &init_net)) - kfree(ipvs->lblcr_ctl_table); - -err_dup: - return -ENOMEM; } static void __net_exit __ip_vs_lblcr_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); +#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblcr_ctl_header); +#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblcr_ctl_table); -- cgit v1.1 From ed3d1e7b72069a3463b7e227b18cae4a09b0ddad Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 1 Feb 2011 18:30:26 +0100 Subject: IPVS: Remove ip_vs_sync_cleanup from section __exit ip_vs_sync_cleanup() may be called from ip_vs_init() on error and thus needs to be accesible from section __init Reporte-by: Randy Dunlap Signed-off-by: Simon Horman Acked-by: Randy Dunlap Signed-off-by: Hans Schillstrom Tested-by: Hans Schillstrom Signed-off-by: Patrick McHardy --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d5a6e64..2a2a836 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1686,7 +1686,7 @@ int __init ip_vs_sync_init(void) return register_pernet_subsys(&ipvs_sync_ops); } -void __exit ip_vs_sync_cleanup(void) +void ip_vs_sync_cleanup(void) { unregister_pernet_subsys(&ipvs_sync_ops); } -- cgit v1.1 From 3630b7c050d9c3564f143d595339fc06b888d6f3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Feb 2011 15:15:39 -0800 Subject: ipv4: Remove fib_hash. The time has finally come to remove the hash based routing table implementation in ipv4. FIB Trie is mature, well tested, and I've done an audit of it's code to confirm that it implements insert, delete, and lookup with the same identical semantics as fib_hash did. If there are any semantic differences found in fib_trie, we should simply fix them. I've placed the trie statistic config option under advanced router configuration. Signed-off-by: David S. Miller Acked-by: Stephen Hemminger --- net/ipv4/Kconfig | 38 +- net/ipv4/Makefile | 4 +- net/ipv4/fib_hash.c | 1061 --------------------------------------------------- 3 files changed, 2 insertions(+), 1101 deletions(-) delete mode 100644 net/ipv4/fib_hash.c (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8949a05..cbb505b 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -55,45 +55,9 @@ config IP_ADVANCED_ROUTER If unsure, say N here. -choice - prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" - depends on IP_ADVANCED_ROUTER - default ASK_IP_FIB_HASH - -config ASK_IP_FIB_HASH - bool "FIB_HASH" - ---help--- - Current FIB is very proven and good enough for most users. - -config IP_FIB_TRIE - bool "FIB_TRIE" - ---help--- - Use new experimental LC-trie as FIB lookup algorithm. - This improves lookup performance if you have a large - number of routes. - - LC-trie is a longest matching prefix lookup algorithm which - performs better than FIB_HASH for large routing tables. - But, it consumes more memory and is more complex. - - LC-trie is described in: - - IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson - IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, - June 1999 - - An experimental study of compression methods for dynamic tries - Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. - - -endchoice - -config IP_FIB_HASH - def_bool ASK_IP_FIB_HASH || !IP_ADVANCED_ROUTER - config IP_FIB_TRIE_STATS bool "FIB TRIE statistics" - depends on IP_FIB_TRIE + depends on IP_ADVANCED_ROUTER ---help--- Keep track of statistics on structure of FIB TRIE table. Useful for testing and measuring TRIE performance. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 4978d22..0dc772d 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,12 +10,10 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ - fib_frontend.o fib_semantics.o \ + fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o -obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o -obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c deleted file mode 100644 index fadb602..0000000 --- a/net/ipv4/fib_hash.c +++ /dev/null @@ -1,1061 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IPv4 FIB: lookup engine and maintenance routines. - * - * Authors: Alexey Kuznetsov, - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "fib_lookup.h" - -static struct kmem_cache *fn_hash_kmem __read_mostly; -static struct kmem_cache *fn_alias_kmem __read_mostly; - -struct fib_node { - struct hlist_node fn_hash; - struct list_head fn_alias; - __be32 fn_key; - struct fib_alias fn_embedded_alias; -}; - -#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head)) - -struct fn_zone { - struct fn_zone __rcu *fz_next; /* Next not empty zone */ - struct hlist_head __rcu *fz_hash; /* Hash table pointer */ - seqlock_t fz_lock; - u32 fz_hashmask; /* (fz_divisor - 1) */ - - u8 fz_order; /* Zone order (0..32) */ - u8 fz_revorder; /* 32 - fz_order */ - __be32 fz_mask; /* inet_make_mask(order) */ -#define FZ_MASK(fz) ((fz)->fz_mask) - - struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE]; - - int fz_nent; /* Number of entries */ - int fz_divisor; /* Hash size (mask+1) */ -}; - -struct fn_hash { - struct fn_zone *fn_zones[33]; - struct fn_zone __rcu *fn_zone_list; -}; - -static inline u32 fn_hash(__be32 key, struct fn_zone *fz) -{ - u32 h = ntohl(key) >> fz->fz_revorder; - h ^= (h>>20); - h ^= (h>>10); - h ^= (h>>5); - h &= fz->fz_hashmask; - return h; -} - -static inline __be32 fz_key(__be32 dst, struct fn_zone *fz) -{ - return dst & FZ_MASK(fz); -} - -static unsigned int fib_hash_genid; - -#define FZ_MAX_DIVISOR ((PAGE_SIZE<fn_hash); - - new_head = rcu_dereference_protected(fz->fz_hash, 1) + - fn_hash(f->fn_key, fz); - hlist_add_head_rcu(&f->fn_hash, new_head); - } - } -} - -static void fz_hash_free(struct hlist_head *hash, int divisor) -{ - unsigned long size = divisor * sizeof(struct hlist_head); - - if (size <= PAGE_SIZE) - kfree(hash); - else - free_pages((unsigned long)hash, get_order(size)); -} - -static void fn_rehash_zone(struct fn_zone *fz) -{ - struct hlist_head *ht, *old_ht; - int old_divisor, new_divisor; - u32 new_hashmask; - - new_divisor = old_divisor = fz->fz_divisor; - - switch (old_divisor) { - case EMBEDDED_HASH_SIZE: - new_divisor *= EMBEDDED_HASH_SIZE; - break; - case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE: - new_divisor *= (EMBEDDED_HASH_SIZE/2); - break; - default: - if ((old_divisor << 1) > FZ_MAX_DIVISOR) { - printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); - return; - } - new_divisor = (old_divisor << 1); - break; - } - - new_hashmask = (new_divisor - 1); - -#if RT_CACHE_DEBUG >= 2 - printk(KERN_DEBUG "fn_rehash_zone: hash for zone %d grows from %d\n", - fz->fz_order, old_divisor); -#endif - - ht = fz_hash_alloc(new_divisor); - - if (ht) { - struct fn_zone nfz; - - memcpy(&nfz, fz, sizeof(nfz)); - - write_seqlock_bh(&fz->fz_lock); - old_ht = rcu_dereference_protected(fz->fz_hash, 1); - RCU_INIT_POINTER(nfz.fz_hash, ht); - nfz.fz_hashmask = new_hashmask; - nfz.fz_divisor = new_divisor; - fn_rebuild_zone(&nfz, old_ht, old_divisor); - fib_hash_genid++; - rcu_assign_pointer(fz->fz_hash, ht); - fz->fz_hashmask = new_hashmask; - fz->fz_divisor = new_divisor; - write_sequnlock_bh(&fz->fz_lock); - - if (old_ht != fz->fz_embedded_hash) { - synchronize_rcu(); - fz_hash_free(old_ht, old_divisor); - } - } -} - -static void fn_free_node_rcu(struct rcu_head *head) -{ - struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu); - - kmem_cache_free(fn_hash_kmem, f); -} - -static inline void fn_free_node(struct fib_node *f) -{ - call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu); -} - -static void fn_free_alias_rcu(struct rcu_head *head) -{ - struct fib_alias *fa = container_of(head, struct fib_alias, rcu); - - kmem_cache_free(fn_alias_kmem, fa); -} - -static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) -{ - fib_release_info(fa->fa_info); - if (fa == &f->fn_embedded_alias) - fa->fa_info = NULL; - else - call_rcu(&fa->rcu, fn_free_alias_rcu); -} - -static struct fn_zone * -fn_new_zone(struct fn_hash *table, int z) -{ - int i; - struct fn_zone *fz = kzalloc(sizeof(struct fn_zone), GFP_KERNEL); - if (!fz) - return NULL; - - seqlock_init(&fz->fz_lock); - fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1; - fz->fz_hashmask = fz->fz_divisor - 1; - RCU_INIT_POINTER(fz->fz_hash, fz->fz_embedded_hash); - fz->fz_order = z; - fz->fz_revorder = 32 - z; - fz->fz_mask = inet_make_mask(z); - - /* Find the first not empty zone with more specific mask */ - for (i = z + 1; i <= 32; i++) - if (table->fn_zones[i]) - break; - if (i > 32) { - /* No more specific masks, we are the first. */ - rcu_assign_pointer(fz->fz_next, - rtnl_dereference(table->fn_zone_list)); - rcu_assign_pointer(table->fn_zone_list, fz); - } else { - rcu_assign_pointer(fz->fz_next, - rtnl_dereference(table->fn_zones[i]->fz_next)); - rcu_assign_pointer(table->fn_zones[i]->fz_next, fz); - } - table->fn_zones[z] = fz; - fib_hash_genid++; - return fz; -} - -int fib_table_lookup(struct fib_table *tb, - const struct flowi *flp, struct fib_result *res, - int fib_flags) -{ - int err; - struct fn_zone *fz; - struct fn_hash *t = (struct fn_hash *)tb->tb_data; - - rcu_read_lock(); - for (fz = rcu_dereference(t->fn_zone_list); - fz != NULL; - fz = rcu_dereference(fz->fz_next)) { - struct hlist_head *head; - struct hlist_node *node; - struct fib_node *f; - __be32 k; - unsigned int seq; - - do { - seq = read_seqbegin(&fz->fz_lock); - k = fz_key(flp->fl4_dst, fz); - - head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz); - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - if (f->fn_key != k) - continue; - - err = fib_semantic_match(tb, &f->fn_alias, - flp, res, - fz->fz_order, fib_flags); - if (err <= 0) - goto out; - } - } while (read_seqretry(&fz->fz_lock, seq)); - } - err = 1; -out: - rcu_read_unlock(); - return err; -} - -/* Insert node F to FZ. */ -static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(f->fn_key, fz); - - hlist_add_head_rcu(&f->fn_hash, head); -} - -/* Return the node in FZ matching KEY. */ -static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + fn_hash(key, fz); - struct hlist_node *node; - struct fib_node *f; - - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - if (f->fn_key == key) - return f; - } - - return NULL; -} - - -static struct fib_alias *fib_fast_alloc(struct fib_node *f) -{ - struct fib_alias *fa = &f->fn_embedded_alias; - - if (fa->fa_info != NULL) - fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); - return fa; -} - -/* Caller must hold RTNL. */ -int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fib_node *new_f = NULL; - struct fib_node *f; - struct fib_alias *fa, *new_fa; - struct fn_zone *fz; - struct fib_info *fi; - u8 tos = cfg->fc_tos; - __be32 key; - int err; - - if (cfg->fc_dst_len > 32) - return -EINVAL; - - fz = table->fn_zones[cfg->fc_dst_len]; - if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) - return -ENOBUFS; - - key = 0; - if (cfg->fc_dst) { - if (cfg->fc_dst & ~FZ_MASK(fz)) - return -EINVAL; - key = fz_key(cfg->fc_dst, fz); - } - - fi = fib_create_info(cfg); - if (IS_ERR(fi)) - return PTR_ERR(fi); - - if (fz->fz_nent > (fz->fz_divisor<<1) && - fz->fz_divisor < FZ_MAX_DIVISOR && - (cfg->fc_dst_len == 32 || - (1 << cfg->fc_dst_len) > fz->fz_divisor)) - fn_rehash_zone(fz); - - f = fib_find_node(fz, key); - - if (!f) - fa = NULL; - else - fa = fib_find_alias(&f->fn_alias, tos, fi->fib_priority); - - /* Now fa, if non-NULL, points to the first fib alias - * with the same keys [prefix,tos,priority], if such key already - * exists or to the node before which we will insert new one. - * - * If fa is NULL, we will need to allocate a new one and - * insert to the head of f. - * - * If f is NULL, no fib node matched the destination key - * and we need to allocate a new one of those as well. - */ - - if (fa && fa->fa_tos == tos && - fa->fa_info->fib_priority == fi->fib_priority) { - struct fib_alias *fa_first, *fa_match; - - err = -EEXIST; - if (cfg->fc_nlflags & NLM_F_EXCL) - goto out; - - /* We have 2 goals: - * 1. Find exact match for type, scope, fib_info to avoid - * duplicate routes - * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it - */ - fa_match = NULL; - fa_first = fa; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { - if (fa->fa_tos != tos) - break; - if (fa->fa_info->fib_priority != fi->fib_priority) - break; - if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && - fa->fa_info == fi) { - fa_match = fa; - break; - } - } - - if (cfg->fc_nlflags & NLM_F_REPLACE) { - u8 state; - - fa = fa_first; - if (fa_match) { - if (fa == fa_match) - err = 0; - goto out; - } - err = -ENOBUFS; - new_fa = fib_fast_alloc(f); - if (new_fa == NULL) - goto out; - - new_fa->fa_tos = fa->fa_tos; - new_fa->fa_info = fi; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; - state = fa->fa_state; - new_fa->fa_state = state & ~FA_S_ACCESSED; - fib_hash_genid++; - list_replace_rcu(&fa->fa_list, &new_fa->fa_list); - - fn_free_alias(fa, f); - if (state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, - tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); - return 0; - } - - /* Error if we find a perfect match which - * uses the same scope, type, and nexthop - * information. - */ - if (fa_match) - goto out; - - if (!(cfg->fc_nlflags & NLM_F_APPEND)) - fa = fa_first; - } - - err = -ENOENT; - if (!(cfg->fc_nlflags & NLM_F_CREATE)) - goto out; - - err = -ENOBUFS; - - if (!f) { - new_f = kmem_cache_zalloc(fn_hash_kmem, GFP_KERNEL); - if (new_f == NULL) - goto out; - - INIT_HLIST_NODE(&new_f->fn_hash); - INIT_LIST_HEAD(&new_f->fn_alias); - new_f->fn_key = key; - f = new_f; - } - - new_fa = fib_fast_alloc(f); - if (new_fa == NULL) - goto out; - - new_fa->fa_info = fi; - new_fa->fa_tos = tos; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; - new_fa->fa_state = 0; - - /* - * Insert new entry to the list. - */ - - if (new_f) - fib_insert_node(fz, new_f); - list_add_tail_rcu(&new_fa->fa_list, - (fa ? &fa->fa_list : &f->fn_alias)); - fib_hash_genid++; - - if (new_f) - fz->fz_nent++; - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - - rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, - &cfg->fc_nlinfo, 0); - return 0; - -out: - if (new_f) - kmem_cache_free(fn_hash_kmem, new_f); - fib_release_info(fi); - return err; -} - -int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) -{ - struct fn_hash *table = (struct fn_hash *)tb->tb_data; - struct fib_node *f; - struct fib_alias *fa, *fa_to_delete; - struct fn_zone *fz; - __be32 key; - - if (cfg->fc_dst_len > 32) - return -EINVAL; - - if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) - return -ESRCH; - - key = 0; - if (cfg->fc_dst) { - if (cfg->fc_dst & ~FZ_MASK(fz)) - return -EINVAL; - key = fz_key(cfg->fc_dst, fz); - } - - f = fib_find_node(fz, key); - - if (!f) - fa = NULL; - else - fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); - if (!fa) - return -ESRCH; - - fa_to_delete = NULL; - fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); - list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { - struct fib_info *fi = fa->fa_info; - - if (fa->fa_tos != cfg->fc_tos) - break; - - if ((!cfg->fc_type || - fa->fa_type == cfg->fc_type) && - (cfg->fc_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == cfg->fc_scope) && - (!cfg->fc_protocol || - fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi) == 0) { - fa_to_delete = fa; - break; - } - } - - if (fa_to_delete) { - int kill_fn; - - fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, - tb->tb_id, &cfg->fc_nlinfo, 0); - - kill_fn = 0; - list_del_rcu(&fa->fa_list); - if (list_empty(&f->fn_alias)) { - hlist_del_rcu(&f->fn_hash); - kill_fn = 1; - } - fib_hash_genid++; - - if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); - fn_free_alias(fa, f); - if (kill_fn) { - fn_free_node(f); - fz->fz_nent--; - } - - return 0; - } - return -ESRCH; -} - -static int fn_flush_list(struct fn_zone *fz, int idx) -{ - struct hlist_head *head = rtnl_dereference(fz->fz_hash) + idx; - struct hlist_node *node, *n; - struct fib_node *f; - int found = 0; - - hlist_for_each_entry_safe(f, node, n, head, fn_hash) { - struct fib_alias *fa, *fa_node; - int kill_f; - - kill_f = 0; - list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) { - struct fib_info *fi = fa->fa_info; - - if (fi && (fi->fib_flags&RTNH_F_DEAD)) { - list_del_rcu(&fa->fa_list); - if (list_empty(&f->fn_alias)) { - hlist_del_rcu(&f->fn_hash); - kill_f = 1; - } - fib_hash_genid++; - - fn_free_alias(fa, f); - found++; - } - } - if (kill_f) { - fn_free_node(f); - fz->fz_nent--; - } - } - return found; -} - -/* caller must hold RTNL. */ -int fib_table_flush(struct fib_table *tb) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fn_zone *fz; - int found = 0; - - for (fz = rtnl_dereference(table->fn_zone_list); - fz != NULL; - fz = rtnl_dereference(fz->fz_next)) { - int i; - - for (i = fz->fz_divisor - 1; i >= 0; i--) - found += fn_flush_list(fz, i); - } - return found; -} - -void fib_free_table(struct fib_table *tb) -{ - struct fn_hash *table = (struct fn_hash *) tb->tb_data; - struct fn_zone *fz, *next; - - next = table->fn_zone_list; - while (next != NULL) { - fz = next; - next = fz->fz_next; - - if (fz->fz_hash != fz->fz_embedded_hash) - fz_hash_free(fz->fz_hash, fz->fz_divisor); - - kfree(fz); - } - - kfree(tb); -} - -static inline int -fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, - struct fib_table *tb, - struct fn_zone *fz, - struct hlist_head *head) -{ - struct hlist_node *node; - struct fib_node *f; - int i, s_i; - - s_i = cb->args[4]; - i = 0; - hlist_for_each_entry_rcu(f, node, head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) { - if (i < s_i) - goto next; - - if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWROUTE, - tb->tb_id, - fa->fa_type, - fa->fa_scope, - f->fn_key, - fz->fz_order, - fa->fa_tos, - fa->fa_info, - NLM_F_MULTI) < 0) { - cb->args[4] = i; - return -1; - } -next: - i++; - } - } - cb->args[4] = i; - return skb->len; -} - -static inline int -fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, - struct fib_table *tb, - struct fn_zone *fz) -{ - int h, s_h; - struct hlist_head *head = rcu_dereference(fz->fz_hash); - - if (head == NULL) - return skb->len; - s_h = cb->args[3]; - for (h = s_h; h < fz->fz_divisor; h++) { - if (hlist_empty(head + h)) - continue; - if (fn_hash_dump_bucket(skb, cb, tb, fz, head + h) < 0) { - cb->args[3] = h; - return -1; - } - memset(&cb->args[4], 0, - sizeof(cb->args) - 4*sizeof(cb->args[0])); - } - cb->args[3] = h; - return skb->len; -} - -int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, - struct netlink_callback *cb) -{ - int m = 0, s_m; - struct fn_zone *fz; - struct fn_hash *table = (struct fn_hash *)tb->tb_data; - - s_m = cb->args[2]; - rcu_read_lock(); - for (fz = rcu_dereference(table->fn_zone_list); - fz != NULL; - fz = rcu_dereference(fz->fz_next), m++) { - if (m < s_m) - continue; - if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { - cb->args[2] = m; - rcu_read_unlock(); - return -1; - } - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); - } - rcu_read_unlock(); - cb->args[2] = m; - return skb->len; -} - -void __init fib_hash_init(void) -{ - fn_hash_kmem = kmem_cache_create("ip_fib_hash", sizeof(struct fib_node), - 0, SLAB_PANIC, NULL); - - fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), - 0, SLAB_PANIC, NULL); - -} - -struct fib_table *fib_hash_table(u32 id) -{ - struct fib_table *tb; - - tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), - GFP_KERNEL); - if (tb == NULL) - return NULL; - - tb->tb_id = id; - tb->tb_default = -1; - - memset(tb->tb_data, 0, sizeof(struct fn_hash)); - return tb; -} - -/* ------------------------------------------------------------------------ */ -#ifdef CONFIG_PROC_FS - -struct fib_iter_state { - struct seq_net_private p; - struct fn_zone *zone; - int bucket; - struct hlist_head *hash_head; - struct fib_node *fn; - struct fib_alias *fa; - loff_t pos; - unsigned int genid; - int valid; -}; - -static struct fib_alias *fib_get_first(struct seq_file *seq) -{ - struct fib_iter_state *iter = seq->private; - struct fib_table *main_table; - struct fn_hash *table; - - main_table = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN); - table = (struct fn_hash *)main_table->tb_data; - - iter->bucket = 0; - iter->hash_head = NULL; - iter->fn = NULL; - iter->fa = NULL; - iter->pos = 0; - iter->genid = fib_hash_genid; - iter->valid = 1; - - for (iter->zone = rcu_dereference(table->fn_zone_list); - iter->zone != NULL; - iter->zone = rcu_dereference(iter->zone->fz_next)) { - int maxslot; - - if (!iter->zone->fz_nent) - continue; - - iter->hash_head = rcu_dereference(iter->zone->fz_hash); - maxslot = iter->zone->fz_divisor; - - for (iter->bucket = 0; iter->bucket < maxslot; - ++iter->bucket, ++iter->hash_head) { - struct hlist_node *node; - struct fib_node *fn; - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - struct fib_alias *fa; - - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } - } -out: - return iter->fa; -} - -static struct fib_alias *fib_get_next(struct seq_file *seq) -{ - struct fib_iter_state *iter = seq->private; - struct fib_node *fn; - struct fib_alias *fa; - - /* Advance FA, if any. */ - fn = iter->fn; - fa = iter->fa; - if (fa) { - BUG_ON(!fn); - list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) { - iter->fa = fa; - goto out; - } - } - - fa = iter->fa = NULL; - - /* Advance FN. */ - if (fn) { - struct hlist_node *node = &fn->fn_hash; - hlist_for_each_entry_continue(fn, node, fn_hash) { - iter->fn = fn; - - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fa = fa; - goto out; - } - } - } - - fn = iter->fn = NULL; - - /* Advance hash chain. */ - if (!iter->zone) - goto out; - - for (;;) { - struct hlist_node *node; - int maxslot; - - maxslot = iter->zone->fz_divisor; - - while (++iter->bucket < maxslot) { - iter->hash_head++; - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } - - iter->zone = rcu_dereference(iter->zone->fz_next); - - if (!iter->zone) - goto out; - - iter->bucket = 0; - iter->hash_head = rcu_dereference(iter->zone->fz_hash); - - hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) { - list_for_each_entry(fa, &fn->fn_alias, fa_list) { - iter->fn = fn; - iter->fa = fa; - goto out; - } - } - } -out: - iter->pos++; - return fa; -} - -static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos) -{ - struct fib_iter_state *iter = seq->private; - struct fib_alias *fa; - - if (iter->valid && pos >= iter->pos && iter->genid == fib_hash_genid) { - fa = iter->fa; - pos -= iter->pos; - } else - fa = fib_get_first(seq); - - if (fa) - while (pos && (fa = fib_get_next(seq))) - --pos; - return pos ? NULL : fa; -} - -static void *fib_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - void *v = NULL; - - rcu_read_lock(); - if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) - v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; - return v; -} - -static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq); -} - -static void fib_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) -{ - static const unsigned type2flags[RTN_MAX + 1] = { - [7] = RTF_REJECT, - [8] = RTF_REJECT, - }; - unsigned flags = type2flags[type]; - - if (fi && fi->fib_nh->nh_gw) - flags |= RTF_GATEWAY; - if (mask == htonl(0xFFFFFFFF)) - flags |= RTF_HOST; - flags |= RTF_UP; - return flags; -} - -/* - * This outputs /proc/net/route. - * - * It always works in backward compatibility mode. - * The format of the file is not supposed to be changed. - */ -static int fib_seq_show(struct seq_file *seq, void *v) -{ - struct fib_iter_state *iter; - int len; - __be32 prefix, mask; - unsigned flags; - struct fib_node *f; - struct fib_alias *fa; - struct fib_info *fi; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway " - "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU" - "\tWindow\tIRTT"); - goto out; - } - - iter = seq->private; - f = iter->fn; - fa = iter->fa; - fi = fa->fa_info; - prefix = f->fn_key; - mask = FZ_MASK(iter->zone); - flags = fib_flag_trans(fa->fa_type, mask, fi); - if (fi) - seq_printf(seq, - "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", - fi->fib_dev ? fi->fib_dev->name : "*", prefix, - fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority, - mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0), - fi->fib_window, - fi->fib_rtt >> 3, &len); - else - seq_printf(seq, - "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u%n", - prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0, &len); - - seq_printf(seq, "%*s\n", 127 - len, ""); -out: - return 0; -} - -static const struct seq_operations fib_seq_ops = { - .start = fib_seq_start, - .next = fib_seq_next, - .stop = fib_seq_stop, - .show = fib_seq_show, -}; - -static int fib_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &fib_seq_ops, - sizeof(struct fib_iter_state)); -} - -static const struct file_operations fib_seq_fops = { - .owner = THIS_MODULE, - .open = fib_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -int __net_init fib_proc_init(struct net *net) -{ - if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) - return -ENOMEM; - return 0; -} - -void __net_exit fib_proc_exit(struct net *net) -{ - proc_net_remove(net, "route"); -} -#endif /* CONFIG_PROC_FS */ -- cgit v1.1 From 5348ba85a02ffe80a8af33a524b6610966760d3d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Feb 2011 15:30:56 -0800 Subject: ipv4: Update some fib_hash centric interface names. fib_hash_init() --> fib_trie_init() fib_hash_table() --> fib_trie_table() Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 8 ++++---- net/ipv4/fib_trie.c | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 930768b..2a49c06 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -51,11 +51,11 @@ static int __net_init fib4_rules_init(struct net *net) { struct fib_table *local_table, *main_table; - local_table = fib_hash_table(RT_TABLE_LOCAL); + local_table = fib_trie_table(RT_TABLE_LOCAL); if (local_table == NULL) return -ENOMEM; - main_table = fib_hash_table(RT_TABLE_MAIN); + main_table = fib_trie_table(RT_TABLE_MAIN); if (main_table == NULL) goto fail; @@ -82,7 +82,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (tb) return tb; - tb = fib_hash_table(id); + tb = fib_trie_table(id); if (!tb) return NULL; h = id & (FIB_TABLE_HASHSZ - 1); @@ -1086,5 +1086,5 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - fib_hash_init(); + fib_trie_init(); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 16d589c..73cb984 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1916,7 +1916,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, return skb->len; } -void __init fib_hash_init(void) +void __init fib_trie_init(void) { fn_alias_kmem = kmem_cache_create("ip_fib_alias", sizeof(struct fib_alias), @@ -1929,8 +1929,7 @@ void __init fib_hash_init(void) } -/* Fix more generic FIB names for init later */ -struct fib_table *fib_hash_table(u32 id) +struct fib_table *fib_trie_table(u32 id) { struct fib_table *tb; struct trie *t; -- cgit v1.1 From 123b9731b14f49cd41c91ed2b6c31e515615347c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Feb 2011 15:34:21 -0800 Subject: ipv4: Rename fib_hash_* locals in fib_semantics.c To avoid confusion with the recently deleted fib_hash.c code, use "fib_info_hash_*" instead of plain "fib_hash_*". Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b15857d..146bd82 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -49,7 +49,7 @@ static DEFINE_SPINLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; -static unsigned int fib_hash_size; +static unsigned int fib_info_hash_size; static unsigned int fib_info_cnt; #define DEVINDEX_HASHBITS 8 @@ -223,7 +223,7 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) static inline unsigned int fib_info_hashfn(const struct fib_info *fi) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = (fib_info_hash_size - 1); unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; @@ -615,14 +615,14 @@ out: static inline unsigned int fib_laddr_hashfn(__be32 val) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = (fib_info_hash_size - 1); return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } -static struct hlist_head *fib_hash_alloc(int bytes) +static struct hlist_head *fib_info_hash_alloc(int bytes) { if (bytes <= PAGE_SIZE) return kzalloc(bytes, GFP_KERNEL); @@ -632,7 +632,7 @@ static struct hlist_head *fib_hash_alloc(int bytes) get_order(bytes)); } -static void fib_hash_free(struct hlist_head *hash, int bytes) +static void fib_info_hash_free(struct hlist_head *hash, int bytes) { if (!hash) return; @@ -643,18 +643,18 @@ static void fib_hash_free(struct hlist_head *hash, int bytes) free_pages((unsigned long) hash, get_order(bytes)); } -static void fib_hash_move(struct hlist_head *new_info_hash, - struct hlist_head *new_laddrhash, - unsigned int new_size) +static void fib_info_hash_move(struct hlist_head *new_info_hash, + struct hlist_head *new_laddrhash, + unsigned int new_size) { struct hlist_head *old_info_hash, *old_laddrhash; - unsigned int old_size = fib_hash_size; + unsigned int old_size = fib_info_hash_size; unsigned int i, bytes; spin_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; - fib_hash_size = new_size; + fib_info_hash_size = new_size; for (i = 0; i < old_size; i++) { struct hlist_head *head = &fib_info_hash[i]; @@ -695,8 +695,8 @@ static void fib_hash_move(struct hlist_head *new_info_hash, spin_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); - fib_hash_free(old_info_hash, bytes); - fib_hash_free(old_laddrhash, bytes); + fib_info_hash_free(old_info_hash, bytes); + fib_info_hash_free(old_laddrhash, bytes); } struct fib_info *fib_create_info(struct fib_config *cfg) @@ -720,8 +720,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_hash_size) { - unsigned int new_size = fib_hash_size << 1; + if (fib_info_cnt >= fib_info_hash_size) { + unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; unsigned int bytes; @@ -729,15 +729,15 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (!new_size) new_size = 1; bytes = new_size * sizeof(struct hlist_head *); - new_info_hash = fib_hash_alloc(bytes); - new_laddrhash = fib_hash_alloc(bytes); + new_info_hash = fib_info_hash_alloc(bytes); + new_laddrhash = fib_info_hash_alloc(bytes); if (!new_info_hash || !new_laddrhash) { - fib_hash_free(new_info_hash, bytes); - fib_hash_free(new_laddrhash, bytes); + fib_info_hash_free(new_info_hash, bytes); + fib_info_hash_free(new_laddrhash, bytes); } else - fib_hash_move(new_info_hash, new_laddrhash, new_size); + fib_info_hash_move(new_info_hash, new_laddrhash, new_size); - if (!fib_hash_size) + if (!fib_info_hash_size) goto failure; } -- cgit v1.1 From 316ed388802533bcfd3dffb38d2ba29ac5428456 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 2 Feb 2011 09:31:37 +0100 Subject: netfilter: ipset: add missing break statemtns in ip_set_get_ip_port() Don't fall through in the switch statement, otherwise IPv4 headers are incorrectly parsed again as IPv6 and the return value will always be 'false'. Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_getport.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 76737bb..4dd2785 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -118,8 +118,10 @@ ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port) switch (pf) { case AF_INET: ret = ip_set_get_ip4_port(skb, src, port, &proto); + break; case AF_INET6: ret = ip_set_get_ip6_port(skb, src, port, &proto); + break; default: return false; } -- cgit v1.1 From 724bab476bcac9f7d0b5204cb06e346216d42166 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 2 Feb 2011 23:50:01 +0100 Subject: netfilter: ipset: fix linking with CONFIG_IPV6=n Add a dummy ip_set_get_ip6_port function that unconditionally returns false for CONFIG_IPV6=n and convert the real function to ipv6_skip_exthdr() to avoid pulling in the ip6_tables module when loading ipset. Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_getport.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 4dd2785..8d52272 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -93,21 +94,23 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, } EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src, __be16 *port, u8 *proto) { - unsigned int protooff = 0; - int protocol; - unsigned short fragoff; + int protoff; + u8 nexthdr; - protocol = ipv6_find_hdr(skb, &protooff, -1, &fragoff); - if (protocol <= 0 || fragoff) + nexthdr = ipv6_hdr(skb)->nexthdr; + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr); + if (protoff < 0) return false; - return get_port(skb, protocol, protooff, src, port, proto); + return get_port(skb, nexthdr, protoff, src, port, proto); } EXPORT_SYMBOL_GPL(ip_set_get_ip6_port); +#endif bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port) -- cgit v1.1 From 5f52bc3cdd1bb2e12e61639df19d9dcd530c4568 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 2 Feb 2011 23:56:00 +0100 Subject: netfilter: ipset: send error message manually When a message carries multiple commands and one of them triggers an error, we have to report to the userspace which one was that. The line number of the command plays this role and there's an attribute reserved in the header part of the message to be filled out with the error line number. In order not to modify the original message received from the userspace, we construct a new, complete netlink error message and modifies the attribute there, then send it. Netlink is notified not to send its ACK/error message. Signed-off-by: Jozsef Kadlecsik --- net/netfilter/ipset/ip_set_core.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index ae0f8b5..8b1a54c 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1098,7 +1098,7 @@ static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static int -call_ad(struct sk_buff *skb, struct ip_set *set, +call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 flags, bool use_lineno) { @@ -1118,12 +1118,25 @@ call_ad(struct sk_buff *skb, struct ip_set *set, return 0; if (lineno && use_lineno) { /* Error in restore/batch mode: send back lineno */ - struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb); + struct sk_buff *skb2; + struct nlmsgerr *errmsg; + size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; - struct nlattr *cmdattr = (void *)nlh + min_len; + struct nlattr *cmdattr; u32 *errline; + skb2 = nlmsg_new(payload, GFP_KERNEL); + if (skb2 == NULL) + return -ENOMEM; + rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); + errmsg = nlmsg_data(rep); + errmsg->error = ret; + memcpy(&errmsg->msg, nlh, nlh->nlmsg_len); + cmdattr = (void *)&errmsg->msg + min_len; + nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, nlh->nlmsg_len - min_len, ip_set_adt_policy); @@ -1131,6 +1144,10 @@ call_ad(struct sk_buff *skb, struct ip_set *set, errline = nla_data(cda[IPSET_ATTR_LINENO]); *errline = lineno; + + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + /* Signal netlink not to send its ACK/errmsg. */ + return -EINTR; } return ret; @@ -1169,7 +1186,8 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_DATA], set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno); + ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, + use_lineno); } else { int nla_rem; @@ -1180,7 +1198,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(skb, set, tb, IPSET_ADD, + ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags, use_lineno); if (ret < 0) return ret; @@ -1222,7 +1240,8 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_DATA], set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno); + ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, + use_lineno); } else { int nla_rem; @@ -1233,7 +1252,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy)) return -IPSET_ERR_PROTOCOL; - ret = call_ad(skb, set, tb, IPSET_DEL, + ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags, use_lineno); if (ret < 0) return ret; -- cgit v1.1 From 9291747f118d6404e509747b85ff5f6dfec368d2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Feb 2011 00:05:43 +0100 Subject: netfilter: xtables: add device group match Add a new 'devgroup' match to match on the device group of the incoming and outgoing network device of a packet. Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 9 +++++ net/netfilter/Makefile | 1 + net/netfilter/xt_devgroup.c | 82 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) create mode 100644 net/netfilter/xt_devgroup.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 06fa9e4..82a6e0d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -738,6 +738,15 @@ config NETFILTER_XT_MATCH_DCCP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_DEVGROUP + tristate '"devgroup" match support' + depends on NETFILTER_ADVANCED + help + This options adds a `devgroup' match, which allows to match on the + device group a network device is assigned to. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_DSCP tristate '"dscp" and "tos" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1148643..d57a890 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c new file mode 100644 index 0000000..d9202cd --- /dev/null +++ b/net/netfilter/xt_devgroup.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: Device group match"); +MODULE_ALIAS("ipt_devgroup"); +MODULE_ALIAS("ip6t_devgroup"); + +static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_devgroup_info *info = par->matchinfo; + + if (info->flags & XT_DEVGROUP_MATCH_SRC && + (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^ + ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0))) + return false; + + if (info->flags & XT_DEVGROUP_MATCH_DST && + (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^ + ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0))) + return false; + + return true; +} + +static int devgroup_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_devgroup_info *info = par->matchinfo; + + if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC | + XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST)) + return -EINVAL; + + if (info->flags & XT_DEVGROUP_MATCH_SRC && + par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD))) + return -EINVAL; + + if (info->flags & XT_DEVGROUP_MATCH_DST && + par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) + return -EINVAL; + + return 0; +} + +static struct xt_match devgroup_mt_reg __read_mostly = { + .name = "devgroup", + .match = devgroup_mt, + .checkentry = devgroup_mt_checkentry, + .matchsize = sizeof(struct xt_devgroup_info), + .family = NFPROTO_UNSPEC, + .me = THIS_MODULE +}; + +static int __init devgroup_mt_init(void) +{ + return xt_register_match(&devgroup_mt_reg); +} + +static void __exit devgroup_mt_exit(void) +{ + xt_unregister_match(&devgroup_mt_reg); +} + +module_init(devgroup_mt_init); +module_exit(devgroup_mt_exit); -- cgit v1.1 From 442b9635c569fef038d5367a7acd906db4677ae1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Feb 2011 17:05:11 -0800 Subject: tcp: Increase the initial congestion window to 10. Signed-off-by: David S. Miller Acked-by: Nandita Dukkipati --- net/dccp/ccids/ccid2.c | 9 +++++++++ net/ipv4/tcp_input.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e96d5e8..fadecd2 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -583,6 +583,15 @@ done: dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hc = ccid_priv(ccid); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb7f82e..2f692ce 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); if (!cwnd) - cwnd = rfc3390_bytes_to_packets(tp->mss_cache); + cwnd = TCP_INIT_CWND; return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -- cgit v1.1 From b299e4f001cfa16205f9121f4630970049652268 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Feb 2011 20:48:10 -0800 Subject: ipv4: Fix fib_trie build in some configurations. If we end up including include/linux/node.h (either explicitly or implicitly) that header has a definition of "structt node" too. So rename the one we use in fib_trie to "rt_trie_node" to avoid the conflict. Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 120 ++++++++++++++++++++++++++-------------------------- 1 file changed, 60 insertions(+), 60 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 73cb984..1eae90b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -95,7 +95,7 @@ typedef unsigned int t_key; #define IS_TNODE(n) (!(n->parent & T_LEAF)) #define IS_LEAF(n) (n->parent & T_LEAF) -struct node { +struct rt_trie_node { unsigned long parent; t_key key; }; @@ -126,7 +126,7 @@ struct tnode { struct work_struct work; struct tnode *tnode_free; }; - struct node *child[0]; + struct rt_trie_node *child[0]; }; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -151,16 +151,16 @@ struct trie_stat { }; struct trie { - struct node *trie; + struct rt_trie_node *trie; #ifdef CONFIG_IP_FIB_TRIE_STATS struct trie_use_stats stats; #endif }; -static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n); -static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, +static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); +static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull); -static struct node *resize(struct trie *t, struct tnode *tn); +static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); /* tnodes to free after resize(); protected by RTNL */ @@ -177,12 +177,12 @@ static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; -static inline struct tnode *node_parent(struct node *node) +static inline struct tnode *node_parent(struct rt_trie_node *node) { return (struct tnode *)(node->parent & ~NODE_TYPE_MASK); } -static inline struct tnode *node_parent_rcu(struct node *node) +static inline struct tnode *node_parent_rcu(struct rt_trie_node *node) { struct tnode *ret = node_parent(node); @@ -192,22 +192,22 @@ static inline struct tnode *node_parent_rcu(struct node *node) /* Same as rcu_assign_pointer * but that macro() assumes that value is a pointer. */ -static inline void node_set_parent(struct node *node, struct tnode *ptr) +static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr) { smp_wmb(); node->parent = (unsigned long)ptr | NODE_TYPE(node); } -static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i) +static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i) { BUG_ON(i >= 1U << tn->bits); return tn->child[i]; } -static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) +static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) { - struct node *ret = tnode_get_child(tn, i); + struct rt_trie_node *ret = tnode_get_child(tn, i); return rcu_dereference_rtnl(ret); } @@ -378,7 +378,7 @@ static void __tnode_free_rcu(struct rcu_head *head) { struct tnode *tn = container_of(head, struct tnode, rcu); size_t size = sizeof(struct tnode) + - (sizeof(struct node *) << tn->bits); + (sizeof(struct rt_trie_node *) << tn->bits); if (size <= PAGE_SIZE) kfree(tn); @@ -402,7 +402,7 @@ static void tnode_free_safe(struct tnode *tn) tn->tnode_free = tnode_free_head; tnode_free_head = tn; tnode_free_size += sizeof(struct tnode) + - (sizeof(struct node *) << tn->bits); + (sizeof(struct rt_trie_node *) << tn->bits); } static void tnode_free_flush(void) @@ -443,7 +443,7 @@ static struct leaf_info *leaf_info_new(int plen) static struct tnode *tnode_new(t_key key, int pos, int bits) { - size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits); + size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits); struct tnode *tn = tnode_alloc(sz); if (tn) { @@ -456,7 +456,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) } pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), - sizeof(struct node) << bits); + sizeof(struct rt_trie_node) << bits); return tn; } @@ -465,7 +465,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) * and no bits are skipped. See discussion in dyntree paper p. 6 */ -static inline int tnode_full(const struct tnode *tn, const struct node *n) +static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n) { if (n == NULL || IS_LEAF(n)) return 0; @@ -474,7 +474,7 @@ static inline int tnode_full(const struct tnode *tn, const struct node *n) } static inline void put_child(struct trie *t, struct tnode *tn, int i, - struct node *n) + struct rt_trie_node *n) { tnode_put_child_reorg(tn, i, n, -1); } @@ -484,10 +484,10 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i, * Update the value of full_children and empty_children. */ -static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, +static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, int wasfull) { - struct node *chi = tn->child[i]; + struct rt_trie_node *chi = tn->child[i]; int isfull; BUG_ON(i >= 1<bits); @@ -515,7 +515,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, } #define MAX_WORK 10 -static struct node *resize(struct trie *t, struct tnode *tn) +static struct rt_trie_node *resize(struct trie *t, struct tnode *tn) { int i; struct tnode *old_tn; @@ -605,7 +605,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Keep root node larger */ - if (!node_parent((struct node *)tn)) { + if (!node_parent((struct rt_trie_node *)tn)) { inflate_threshold_use = inflate_threshold_root; halve_threshold_use = halve_threshold_root; } else { @@ -635,7 +635,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Return if at least one inflate is run */ if (max_work != MAX_WORK) - return (struct node *) tn; + return (struct rt_trie_node *) tn; /* * Halve as long as the number of empty children in this @@ -663,7 +663,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) if (tn->empty_children == tnode_child_length(tn) - 1) { one_child: for (i = 0; i < tnode_child_length(tn); i++) { - struct node *n; + struct rt_trie_node *n; n = tn->child[i]; if (!n) @@ -676,7 +676,7 @@ one_child: return n; } } - return (struct node *) tn; + return (struct rt_trie_node *) tn; } static struct tnode *inflate(struct trie *t, struct tnode *tn) @@ -723,14 +723,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) goto nomem; } - put_child(t, tn, 2*i, (struct node *) left); - put_child(t, tn, 2*i+1, (struct node *) right); + put_child(t, tn, 2*i, (struct rt_trie_node *) left); + put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); } } for (i = 0; i < olen; i++) { struct tnode *inode; - struct node *node = tnode_get_child(oldtnode, i); + struct rt_trie_node *node = tnode_get_child(oldtnode, i); struct tnode *left, *right; int size, j; @@ -825,7 +825,7 @@ nomem: static struct tnode *halve(struct trie *t, struct tnode *tn) { struct tnode *oldtnode = tn; - struct node *left, *right; + struct rt_trie_node *left, *right; int i; int olen = tnode_child_length(tn); @@ -856,7 +856,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) if (!newn) goto nomem; - put_child(t, tn, i/2, (struct node *)newn); + put_child(t, tn, i/2, (struct rt_trie_node *)newn); } } @@ -958,7 +958,7 @@ fib_find_node(struct trie *t, u32 key) { int pos; struct tnode *tn; - struct node *n; + struct rt_trie_node *n; pos = 0; n = rcu_dereference_rtnl(t->trie); @@ -993,17 +993,17 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) key = tn->key; - while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { + while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); tn = (struct tnode *) resize(t, (struct tnode *)tn); tnode_put_child_reorg((struct tnode *)tp, cindex, - (struct node *)tn, wasfull); + (struct rt_trie_node *)tn, wasfull); - tp = node_parent((struct node *) tn); + tp = node_parent((struct rt_trie_node *) tn); if (!tp) - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); if (!tp) @@ -1015,7 +1015,7 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tnode_free_flush(); } @@ -1025,7 +1025,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) { int pos, newpos; struct tnode *tp = NULL, *tn = NULL; - struct node *n; + struct rt_trie_node *n; struct leaf *l; int missbit; struct list_head *fa_head = NULL; @@ -1111,10 +1111,10 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) if (t->trie && n == NULL) { /* Case 2: n is NULL, and will just insert a new leaf */ - node_set_parent((struct node *)l, tp); + node_set_parent((struct rt_trie_node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); - put_child(t, (struct tnode *)tp, cindex, (struct node *)l); + put_child(t, (struct tnode *)tp, cindex, (struct rt_trie_node *)l); } else { /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ /* @@ -1141,18 +1141,18 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) return NULL; } - node_set_parent((struct node *)tn, tp); + node_set_parent((struct rt_trie_node *)tn, tp); missbit = tkey_extract_bits(key, newpos, 1); - put_child(t, tn, missbit, (struct node *)l); + put_child(t, tn, missbit, (struct rt_trie_node *)l); put_child(t, tn, 1-missbit, n); if (tp) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, - (struct node *)tn); + (struct rt_trie_node *)tn); } else { - rcu_assign_pointer(t->trie, (struct node *)tn); + rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); tp = tn; } } @@ -1376,7 +1376,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, { struct trie *t = (struct trie *) tb->tb_data; int ret; - struct node *n; + struct rt_trie_node *n; struct tnode *pn; int pos, bits; t_key key = ntohl(flp->fl4_dst); @@ -1541,7 +1541,7 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - struct tnode *parent = node_parent_rcu((struct node *) pn); + struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn); if (!parent) goto failed; @@ -1568,7 +1568,7 @@ found: */ static void trie_leaf_remove(struct trie *t, struct leaf *l) { - struct tnode *tp = node_parent((struct node *) l); + struct tnode *tp = node_parent((struct rt_trie_node *) l); pr_debug("entering trie_leaf_remove(%p)\n", l); @@ -1706,7 +1706,7 @@ static int trie_flush_leaf(struct leaf *l) * Scan for the next right leaf starting at node p->child[idx] * Since we have back pointer, no recursion necessary. */ -static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) +static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) { do { t_key idx; @@ -1732,7 +1732,7 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) } /* Node empty, walk back up to parent */ - c = (struct node *) p; + c = (struct rt_trie_node *) p; } while ((p = node_parent_rcu(c)) != NULL); return NULL; /* Root of trie */ @@ -1753,7 +1753,7 @@ static struct leaf *trie_firstleaf(struct trie *t) static struct leaf *trie_nextleaf(struct leaf *l) { - struct node *c = (struct node *) l; + struct rt_trie_node *c = (struct rt_trie_node *) l; struct tnode *p = node_parent_rcu(c); if (!p) @@ -1961,7 +1961,7 @@ struct fib_trie_iter { unsigned int depth; }; -static struct node *fib_trie_get_next(struct fib_trie_iter *iter) +static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter) { struct tnode *tn = iter->tnode; unsigned int cindex = iter->index; @@ -1975,7 +1975,7 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) iter->tnode, iter->index, iter->depth); rescan: while (cindex < (1<bits)) { - struct node *n = tnode_get_child_rcu(tn, cindex); + struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex); if (n) { if (IS_LEAF(n)) { @@ -1994,7 +1994,7 @@ rescan: } /* Current node exhausted, pop back up */ - p = node_parent_rcu((struct node *)tn); + p = node_parent_rcu((struct rt_trie_node *)tn); if (p) { cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; tn = p; @@ -2006,10 +2006,10 @@ rescan: return NULL; } -static struct node *fib_trie_get_first(struct fib_trie_iter *iter, +static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t) { - struct node *n; + struct rt_trie_node *n; if (!t) return NULL; @@ -2033,7 +2033,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, static void trie_collect_stats(struct trie *t, struct trie_stat *s) { - struct node *n; + struct rt_trie_node *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); @@ -2106,7 +2106,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); - bytes += sizeof(struct node *) * pointers; + bytes += sizeof(struct rt_trie_node *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024); } @@ -2187,7 +2187,7 @@ static const struct file_operations fib_triestat_fops = { .release = single_release_net, }; -static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) +static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) { struct fib_trie_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2200,7 +2200,7 @@ static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos) struct fib_table *tb; hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { - struct node *n; + struct rt_trie_node *n; for (n = fib_trie_get_first(iter, (struct trie *) tb->tb_data); @@ -2229,7 +2229,7 @@ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct fib_table *tb = iter->tb; struct hlist_node *tb_node; unsigned int h; - struct node *n; + struct rt_trie_node *n; ++*pos; /* next node in same table */ @@ -2315,7 +2315,7 @@ static inline const char *rtn_type(char *buf, size_t len, unsigned int t) static int fib_trie_seq_show(struct seq_file *seq, void *v) { const struct fib_trie_iter *iter = seq->private; - struct node *n = v; + struct rt_trie_node *n = v; if (!node_parent_rcu(n)) fib_table_print(seq, iter->tb); -- cgit v1.1 From 119b3d386985fcd477b3131190c041516a73f83a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 2 Feb 2011 15:19:51 +0000 Subject: sfq: deadlock in error path The change to allow divisor to be a parameter (in 2.6.38-rc1) commit 817fb15dfd988d8dda916ee04fa506f0c466b9d6 introduced a possible deadlock caught by sparse. The scheduler tree lock was left locked in the case of an incorrect divisor value. Simplest fix is to move test outside of lock which also solves problem of partial update. Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 4cff442..c2e628d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -491,17 +491,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; + if (ctl->divisor && + (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) + return -EINVAL; + sch_tree_lock(sch); q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); - if (ctl->divisor) { - if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536) - return -EINVAL; + if (ctl->divisor) q->divisor = ctl->divisor; - } qlen = sch->q.qlen; while (sch->q.qlen > q->limit) sfq_drop(sch); -- cgit v1.1 From 45e144339ac59971eb44be32e1282760aaabe861 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 2 Feb 2011 15:21:10 +0000 Subject: sched: CHOKe flow scheduler CHOKe ("CHOose and Kill" or "CHOose and Keep") is an alternative packet scheduler based on the Random Exponential Drop (RED) algorithm. The core idea is: For every packet arrival: Calculate Qave if (Qave < minth) Queue the new packet else Select randomly a packet from the queue if (both packets from same flow) then Drop both the packets else if (Qave > maxth) Drop packet else Admit packet with proability p (same as RED) See also: Rong Pan, Balaji Prabhakar, Konstantinos Psounis, "CHOKe: a stateless active queue management scheme for approximating fair bandwidth allocation", Proceeding of INFOCOM'2000, March 2000. Help from: Eric Dumazet Patrick McHardy Signed-off-by: Stephen Hemminger Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/Kconfig | 11 + net/sched/Makefile | 2 + net/sched/sch_choke.c | 676 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 689 insertions(+) create mode 100644 net/sched/sch_choke.c (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e318f45..8c19b6e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -217,6 +217,17 @@ config NET_SCH_MQPRIO If unsure, say N. +config NET_SCH_CHOKE + tristate "CHOose and Keep responsive flow scheduler (CHOKE)" + help + Say Y here if you want to use the CHOKe packet scheduler (CHOose + and Keep for responsive flows, CHOose and Kill for unresponsive + flows). This is a variation of RED which trys to penalize flows + that monopolize the queue. + + To compile this code as a module, choose M here: the + module will be called sch_choke. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index 26ce681..06c6cdf 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -33,6 +33,8 @@ obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o +obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o + obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c new file mode 100644 index 0000000..a1cec18 --- /dev/null +++ b/net/sched/sch_choke.c @@ -0,0 +1,676 @@ +/* + * net/sched/sch_choke.c CHOKE scheduler + * + * Copyright (c) 2011 Stephen Hemminger + * Copyright (c) 2011 Eric Dumazet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + CHOKe stateless AQM for fair bandwidth allocation + ================================================= + + CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for + unresponsive flows) is a variant of RED that penalizes misbehaving flows but + maintains no flow state. The difference from RED is an additional step + during the enqueuing process. If average queue size is over the + low threshold (qmin), a packet is chosen at random from the queue. + If both the new and chosen packet are from the same flow, both + are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it + needs to access packets in queue randomly. It has a minimal class + interface to allow overriding the builtin flow classifier with + filters. + + Source: + R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless + Active Queue Management Scheme for Approximating Fair Bandwidth Allocation", + IEEE INFOCOM, 2000. + + A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial + Characteristics", IEEE/ACM Transactions on Networking, 2004 + + */ + +/* Upper bound on size of sk_buff table (packets) */ +#define CHOKE_MAX_QUEUE (128*1024 - 1) + +struct choke_sched_data { +/* Parameters */ + u32 limit; + unsigned char flags; + + struct red_parms parms; + +/* Variables */ + struct tcf_proto *filter_list; + struct { + u32 prob_drop; /* Early probability drops */ + u32 prob_mark; /* Early probability marks */ + u32 forced_drop; /* Forced drops, qavg > max_thresh */ + u32 forced_mark; /* Forced marks, qavg > max_thresh */ + u32 pdrop; /* Drops due to queue limits */ + u32 other; /* Drops due to drop() calls */ + u32 matched; /* Drops to flow match */ + } stats; + + unsigned int head; + unsigned int tail; + + unsigned int tab_mask; /* size - 1 */ + + struct sk_buff **tab; +}; + +/* deliver a random number between 0 and N - 1 */ +static u32 random_N(unsigned int N) +{ + return reciprocal_divide(random32(), N); +} + +/* number of elements in queue including holes */ +static unsigned int choke_len(const struct choke_sched_data *q) +{ + return (q->tail - q->head) & q->tab_mask; +} + +/* Is ECN parameter configured */ +static int use_ecn(const struct choke_sched_data *q) +{ + return q->flags & TC_RED_ECN; +} + +/* Should packets over max just be dropped (versus marked) */ +static int use_harddrop(const struct choke_sched_data *q) +{ + return q->flags & TC_RED_HARDDROP; +} + +/* Move head pointer forward to skip over holes */ +static void choke_zap_head_holes(struct choke_sched_data *q) +{ + do { + q->head = (q->head + 1) & q->tab_mask; + if (q->head == q->tail) + break; + } while (q->tab[q->head] == NULL); +} + +/* Move tail pointer backwards to reuse holes */ +static void choke_zap_tail_holes(struct choke_sched_data *q) +{ + do { + q->tail = (q->tail - 1) & q->tab_mask; + if (q->head == q->tail) + break; + } while (q->tab[q->tail] == NULL); +} + +/* Drop packet from queue array by creating a "hole" */ +static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb = q->tab[idx]; + + q->tab[idx] = NULL; + + if (idx == q->head) + choke_zap_head_holes(q); + if (idx == q->tail) + choke_zap_tail_holes(q); + + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_drop(skb, sch); + qdisc_tree_decrease_qlen(sch, 1); + --sch->q.qlen; +} + +/* + * Compare flow of two packets + * Returns true only if source and destination address and port match. + * false for special cases + */ +static bool choke_match_flow(struct sk_buff *skb1, + struct sk_buff *skb2) +{ + int off1, off2, poff; + const u32 *ports1, *ports2; + u8 ip_proto; + __u32 hash1; + + if (skb1->protocol != skb2->protocol) + return false; + + /* Use hash value as quick check + * Assumes that __skb_get_rxhash makes IP header and ports linear + */ + hash1 = skb_get_rxhash(skb1); + if (!hash1 || hash1 != skb_get_rxhash(skb2)) + return false; + + /* Probably match, but be sure to avoid hash collisions */ + off1 = skb_network_offset(skb1); + off2 = skb_network_offset(skb2); + + switch (skb1->protocol) { + case __constant_htons(ETH_P_IP): { + const struct iphdr *ip1, *ip2; + + ip1 = (const struct iphdr *) (skb1->data + off1); + ip2 = (const struct iphdr *) (skb2->data + off2); + + ip_proto = ip1->protocol; + if (ip_proto != ip2->protocol || + ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) + return false; + + if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET)) + ip_proto = 0; + off1 += ip1->ihl * 4; + off2 += ip2->ihl * 4; + break; + } + + case __constant_htons(ETH_P_IPV6): { + const struct ipv6hdr *ip1, *ip2; + + ip1 = (const struct ipv6hdr *) (skb1->data + off1); + ip2 = (const struct ipv6hdr *) (skb2->data + off2); + + ip_proto = ip1->nexthdr; + if (ip_proto != ip2->nexthdr || + ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) || + ipv6_addr_cmp(&ip1->daddr, &ip2->daddr)) + return false; + off1 += 40; + off2 += 40; + } + + default: /* Maybe compare MAC header here? */ + return false; + } + + poff = proto_ports_offset(ip_proto); + if (poff < 0) + return true; + + off1 += poff; + off2 += poff; + + ports1 = (__force u32 *)(skb1->data + off1); + ports2 = (__force u32 *)(skb2->data + off2); + return *ports1 == *ports2; +} + +static inline void choke_set_classid(struct sk_buff *skb, u16 classid) +{ + *(unsigned int *)(qdisc_skb_cb(skb)->data) = classid; +} + +static u16 choke_get_classid(const struct sk_buff *skb) +{ + return *(unsigned int *)(qdisc_skb_cb(skb)->data); +} + +/* + * Classify flow using either: + * 1. pre-existing classification result in skb + * 2. fast internal classification + * 3. use TC filter based classification + */ +static bool choke_classify(struct sk_buff *skb, + struct Qdisc *sch, int *qerr) + +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct tcf_result res; + int result; + + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return false; + } +#endif + choke_set_classid(skb, TC_H_MIN(res.classid)); + return true; + } + + return false; +} + +/* + * Select a packet at random from queue + * HACK: since queue can have holes from previous deletion; retry several + * times to find a random skb but then just give up and return the head + * Will return NULL if queue is empty (q->head == q->tail) + */ +static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, + unsigned int *pidx) +{ + struct sk_buff *skb; + int retrys = 3; + + do { + *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask; + skb = q->tab[*pidx]; + if (skb) + return skb; + } while (--retrys > 0); + + return q->tab[*pidx = q->head]; +} + +/* + * Compare new packet with random packet in queue + * returns true if matched and sets *pidx + */ +static bool choke_match_random(const struct choke_sched_data *q, + struct sk_buff *nskb, + unsigned int *pidx) +{ + struct sk_buff *oskb; + + if (q->head == q->tail) + return false; + + oskb = choke_peek_random(q, pidx); + if (q->filter_list) + return choke_get_classid(nskb) == choke_get_classid(oskb); + + return choke_match_flow(oskb, nskb); +} + +static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct red_parms *p = &q->parms; + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + + if (q->filter_list) { + /* If using external classifiers, get result and record it. */ + if (!choke_classify(skb, sch, &ret)) + goto other_drop; /* Packet was eaten by filter */ + } + + /* Compute average queue usage (see RED) */ + p->qavg = red_calc_qavg(p, sch->q.qlen); + if (red_is_idling(p)) + red_end_of_idle_period(p); + + /* Is queue small? */ + if (p->qavg <= p->qth_min) + p->qcount = -1; + else { + unsigned int idx; + + /* Draw a packet at random from queue and compare flow */ + if (choke_match_random(q, skb, &idx)) { + q->stats.matched++; + choke_drop_by_idx(sch, idx); + goto congestion_drop; + } + + /* Queue is large, always mark/drop */ + if (p->qavg > p->qth_max) { + p->qcount = -1; + + sch->qstats.overlimits++; + if (use_harddrop(q) || !use_ecn(q) || + !INET_ECN_set_ce(skb)) { + q->stats.forced_drop++; + goto congestion_drop; + } + + q->stats.forced_mark++; + } else if (++p->qcount) { + if (red_mark_probability(p, p->qavg)) { + p->qcount = 0; + p->qR = red_random(p); + + sch->qstats.overlimits++; + if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + q->stats.prob_mark++; + } + } else + p->qR = red_random(p); + } + + /* Admit new packet */ + if (sch->q.qlen < q->limit) { + q->tab[q->tail] = skb; + q->tail = (q->tail + 1) & q->tab_mask; + ++sch->q.qlen; + sch->qstats.backlog += qdisc_pkt_len(skb); + return NET_XMIT_SUCCESS; + } + + q->stats.pdrop++; + sch->qstats.drops++; + kfree_skb(skb); + return NET_XMIT_DROP; + + congestion_drop: + qdisc_drop(skb, sch); + return NET_XMIT_CN; + + other_drop: + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; +} + +static struct sk_buff *choke_dequeue(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct sk_buff *skb; + + if (q->head == q->tail) { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + return NULL; + } + + skb = q->tab[q->head]; + q->tab[q->head] = NULL; + choke_zap_head_holes(q); + --sch->q.qlen; + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_bstats_update(sch, skb); + + return skb; +} + +static unsigned int choke_drop(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + unsigned int len; + + len = qdisc_queue_drop(sch); + if (len > 0) + q->stats.other++; + else { + if (!red_is_idling(&q->parms)) + red_start_of_idle_period(&q->parms); + } + + return len; +} + +static void choke_reset(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + red_restart(&q->parms); +} + +static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { + [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) }, + [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE }, +}; + + +static void choke_free(void *addr) +{ + if (addr) { + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); + } +} + +static int choke_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct nlattr *tb[TCA_CHOKE_MAX + 1]; + const struct tc_red_qopt *ctl; + int err; + struct sk_buff **old = NULL; + unsigned int mask; + + if (opt == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); + if (err < 0) + return err; + + if (tb[TCA_CHOKE_PARMS] == NULL || + tb[TCA_CHOKE_STAB] == NULL) + return -EINVAL; + + ctl = nla_data(tb[TCA_CHOKE_PARMS]); + + if (ctl->limit > CHOKE_MAX_QUEUE) + return -EINVAL; + + mask = roundup_pow_of_two(ctl->limit + 1) - 1; + if (mask != q->tab_mask) { + struct sk_buff **ntab; + + ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); + if (!ntab) + ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); + if (!ntab) + return -ENOMEM; + + sch_tree_lock(sch); + old = q->tab; + if (old) { + unsigned int oqlen = sch->q.qlen, tail = 0; + + while (q->head != q->tail) { + struct sk_buff *skb = q->tab[q->head]; + + q->head = (q->head + 1) & q->tab_mask; + if (!skb) + continue; + if (tail < mask) { + ntab[tail++] = skb; + continue; + } + sch->qstats.backlog -= qdisc_pkt_len(skb); + --sch->q.qlen; + qdisc_drop(skb, sch); + } + qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); + q->head = 0; + q->tail = tail; + } + + q->tab_mask = mask; + q->tab = ntab; + } else + sch_tree_lock(sch); + + q->flags = ctl->flags; + q->limit = ctl->limit; + + red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, + ctl->Plog, ctl->Scell_log, + nla_data(tb[TCA_CHOKE_STAB])); + + if (q->head == q->tail) + red_end_of_idle_period(&q->parms); + + sch_tree_unlock(sch); + choke_free(old); + return 0; +} + +static int choke_init(struct Qdisc *sch, struct nlattr *opt) +{ + return choke_change(sch, opt); +} + +static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct nlattr *opts = NULL; + struct tc_red_qopt opt = { + .limit = q->limit, + .flags = q->flags, + .qth_min = q->parms.qth_min >> q->parms.Wlog, + .qth_max = q->parms.qth_max >> q->parms.Wlog, + .Wlog = q->parms.Wlog, + .Plog = q->parms.Plog, + .Scell_log = q->parms.Scell_log, + }; + + opts = nla_nest_start(skb, TCA_OPTIONS); + if (opts == NULL) + goto nla_put_failure; + + NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt); + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -EMSGSIZE; +} + +static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct choke_sched_data *q = qdisc_priv(sch); + struct tc_choke_xstats st = { + .early = q->stats.prob_drop + q->stats.forced_drop, + .marked = q->stats.prob_mark + q->stats.forced_mark, + .pdrop = q->stats.pdrop, + .other = q->stats.other, + .matched = q->stats.matched, + }; + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static void choke_destroy(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + choke_free(q->tab); +} + +static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg) +{ + return NULL; +} + +static unsigned long choke_get(struct Qdisc *sch, u32 classid) +{ + return 0; +} + +static void choke_put(struct Qdisc *q, unsigned long cl) +{ +} + +static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return 0; +} + +static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static int choke_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + tcm->tcm_handle |= TC_H_MIN(cl); + return 0; +} + +static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + if (!arg->stop) { + if (arg->fn(sch, 1, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops choke_class_ops = { + .leaf = choke_leaf, + .get = choke_get, + .put = choke_put, + .tcf_chain = choke_find_tcf, + .bind_tcf = choke_bind, + .unbind_tcf = choke_put, + .dump = choke_dump_class, + .walk = choke_walk, +}; + +static struct sk_buff *choke_peek_head(struct Qdisc *sch) +{ + struct choke_sched_data *q = qdisc_priv(sch); + + return (q->head != q->tail) ? q->tab[q->head] : NULL; +} + +static struct Qdisc_ops choke_qdisc_ops __read_mostly = { + .id = "choke", + .priv_size = sizeof(struct choke_sched_data), + + .enqueue = choke_enqueue, + .dequeue = choke_dequeue, + .peek = choke_peek_head, + .drop = choke_drop, + .init = choke_init, + .destroy = choke_destroy, + .reset = choke_reset, + .change = choke_change, + .dump = choke_dump, + .dump_stats = choke_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init choke_module_init(void) +{ + return register_qdisc(&choke_qdisc_ops); +} + +static void __exit choke_module_exit(void) +{ + unregister_qdisc(&choke_qdisc_ops); +} + +module_init(choke_module_init) +module_exit(choke_module_exit) + +MODULE_LICENSE("GPL"); -- cgit v1.1 From cdfb74d4c2e3bcc9383121af2591d0ae15007ba7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Feb 2011 23:06:31 -0800 Subject: sch_choke: Need linux/vmalloc.h Signed-off-by: David S. Miller --- net/sched/sch_choke.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index a1cec18..ee1e209 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include -- cgit v1.1 From 8525d6f84f576402278a552ed17d2ba3b61f8e3c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 3 Feb 2011 07:22:43 +0900 Subject: IPVS: Use correct lock in SCTP module Use sctp_app_lock instead of tcp_app_lock in the SCTP protocol module. This appears to be a typo introduced by the netns changes. Signed-off-by: Simon Horman Signed-off-by: Hans Schillstrom --- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index fb2d04a..b027ccc 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1101,7 +1101,7 @@ static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); - spin_lock_init(&ipvs->tcp_app_lock); + spin_lock_init(&ipvs->sctp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); } -- cgit v1.1 From eeabee7e53f6fb5e63027519982b19616e8f166e Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 28 Jan 2011 10:20:47 -0800 Subject: mac80211: Be more careful when changing channels. If we cannot set the channel type, set the channel back to the original. Don't update the driver hardware if nothing actually changed. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4bc8a92..88b0884 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1215,6 +1215,9 @@ static int ieee80211_set_channel(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = NULL; + struct ieee80211_channel *old_oper; + enum nl80211_channel_type old_oper_type; + enum nl80211_channel_type old_vif_oper_type= NL80211_CHAN_NO_HT; if (netdev) sdata = IEEE80211_DEV_TO_SUB_IF(netdev); @@ -1232,13 +1235,23 @@ static int ieee80211_set_channel(struct wiphy *wiphy, break; } - local->oper_channel = chan; + if (sdata) + old_vif_oper_type = sdata->vif.bss_conf.channel_type; + old_oper_type = local->_oper_channel_type; if (!ieee80211_set_channel_type(local, sdata, channel_type)) return -EBUSY; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) + old_oper = local->oper_channel; + local->oper_channel = chan; + + /* Update driver if changes were actually made. */ + if ((old_oper != local->oper_channel) || + (old_oper_type != local->_oper_channel_type)) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + + if ((sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR) && + old_vif_oper_type != sdata->vif.bss_conf.channel_type) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT); return 0; -- cgit v1.1 From 0fa025f0a29ec6848b67a3021db4248c9dcc78ed Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 28 Jan 2011 17:05:42 -0800 Subject: mac80211: Show configured channel-type in netdev debugfs. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/debugfs_netdev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 872adb8..4cffbf6 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -149,6 +149,7 @@ IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ], HEX); IEEE80211_IF_FILE(flags, flags, HEX); IEEE80211_IF_FILE(state, state, LHEX); +IEEE80211_IF_FILE(channel_type, vif.bss_conf.channel_type, DEC); /* STA attributes */ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); @@ -289,6 +290,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(drop_unencrypted); DEBUGFS_ADD(flags); DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -304,6 +306,7 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(drop_unencrypted); DEBUGFS_ADD(flags); DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -317,6 +320,7 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(drop_unencrypted); DEBUGFS_ADD(flags); DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); @@ -328,6 +332,7 @@ static void add_vlan_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(drop_unencrypted); DEBUGFS_ADD(flags); DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); DEBUGFS_ADD(rc_rateidx_mask_2ghz); DEBUGFS_ADD(rc_rateidx_mask_5ghz); } @@ -336,6 +341,7 @@ static void add_monitor_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD(flags); DEBUGFS_ADD(state); + DEBUGFS_ADD(channel_type); } #ifdef CONFIG_MAC80211_MESH -- cgit v1.1 From 172710bf8305c1b145796e34426c865480884024 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 28 Jan 2011 17:05:43 -0800 Subject: mac80211: Warn users if HT fails because of freq mismatch. I have a netgear WNDR3700 that appears to have an off-by-four bug in how it fills out the hti->control_chan (I configure the AP to channel 11, it reports 15 as control_chan). Poke a message into the kernel logs to give users a clue as to why they are not getting the expected channel-type or rate. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index dfa752e..e059b3a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -161,6 +161,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; struct sta_info *sta; u32 changed = 0; + int hti_cfreq; u16 ht_opmode; bool enable_ht = true; enum nl80211_channel_type prev_chantype; @@ -174,10 +175,27 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, if (!sband->ht_cap.ht_supported) enable_ht = false; - /* check that channel matches the right operating channel */ - if (local->hw.conf.channel->center_freq != - ieee80211_channel_to_frequency(hti->control_chan, sband->band)) - enable_ht = false; + if (enable_ht) { + hti_cfreq = ieee80211_channel_to_frequency(hti->control_chan, + sband->band); + /* check that channel matches the right operating channel */ + if (local->hw.conf.channel->center_freq != hti_cfreq) { + /* Some APs mess this up, evidently. + * Netgear WNDR3700 sometimes reports 4 higher than + * the actual channel, for instance. + */ + printk(KERN_DEBUG + "%s: Wrong control channel in association" + " response: configured center-freq: %d" + " hti-cfreq: %d hti->control_chan: %d" + " band: %d. Disabling HT.\n", + sdata->name, + local->hw.conf.channel->center_freq, + hti_cfreq, hti->control_chan, + sband->band); + enable_ht = false; + } + } if (enable_ht) { channel_type = NL80211_CHAN_HT20; -- cgit v1.1 From bf6a0579f60ae5225280c82cc52b51db1255e7fb Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Mon, 31 Jan 2011 15:52:58 +0200 Subject: cfg80211: Fix power save state after interface type change Currently cfg80211 only configures the PSM state to the driver upon creation of a new virtual interface, but not after interface type change. The mac80211 on the other hand reinitializes its sdata structure every time the interface type is changed, losing the PSM configuration. Hence, if the interface type is changed to, say, ad-hoc and then back to managed, "iw wlan0 get power_save" will claim that PSM is enabled, when in fact on mac80211 level it is not. Fix this in cfg80211 by configuring the PSM state to the driver each time the interface is brought up instead of just when the interface is created. Signed-off-by: Juuso Oikarinen Signed-off-by: John W. Linville --- net/wireless/core.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index e9a5f8c..fe01de2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -718,13 +718,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, wdev->ps = false; /* allow mac80211 to determine the timeout */ wdev->ps_timeout = -1; - if (rdev->ops->set_power_mgmt) - if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, - wdev->ps, - wdev->ps_timeout)) { - /* assume this means it's off */ - wdev->ps = false; - } if (!dev->ethtool_ops) dev->ethtool_ops = &cfg80211_ethtool_ops; @@ -813,6 +806,19 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); + + /* + * Configure power management to the driver here so that its + * correctly set also after interface type changes etc. + */ + if (wdev->iftype == NL80211_IFTYPE_STATION && + rdev->ops->set_power_mgmt) + if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, + wdev->ps, + wdev->ps_timeout)) { + /* assume this means it's off */ + wdev->ps = false; + } break; case NETDEV_UNREGISTER: /* -- cgit v1.1 From 2cf22b897c63df65e7360a1897e5312c58617fbd Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 31 Jan 2011 11:30:09 -0800 Subject: mac80211: Recalculate channel-type on iface removal. When a vif goes away, it could cause the super-chan to be recalculated differently, so do that calculation on iface removal. Signed-off-by: Ben Greear Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/iface.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8acba45..5a4e19b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -382,6 +382,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; int i; + enum nl80211_channel_type orig_ct; if (local->scan_sdata == sdata) ieee80211_scan_cancel(local); @@ -542,8 +543,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, hw_reconf_flags = 0; } + /* Re-calculate channel-type, in case there are multiple vifs + * on different channel types. + */ + orig_ct = local->_oper_channel_type; + ieee80211_set_channel_type(local, NULL, NL80211_CHAN_NO_HT); + /* do after stop to avoid reconfiguring when we stop anyway */ - if (hw_reconf_flags) + if (hw_reconf_flags || (orig_ct != local->_oper_channel_type)) ieee80211_hw_config(local, hw_reconf_flags); spin_lock_irqsave(&local->queue_stop_reason_lock, flags); -- cgit v1.1 From 8fd369eeaa81d05969787c9ddf9cf3f1a8c4e084 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Mon, 31 Jan 2011 22:29:12 +0200 Subject: mac80211: do not calc frame duration when using HW rate-control When rate-control is performed in HW, we cannot calculate frame duration as we do not have the skb transmission rate in SW. ieee80211_tx_h_calculate_duration() should only be called when ieee80211_tx_h_rate_ctrl() has been called before to initialize data in skb->cb. This doesn't happen for drivers with HW rate-control. Fixes the following warning when operating in AP-mode in a driver with HW rate-control. WARNING: at net/mac80211/tx.c:57 ieee80211_duration+0x54/0x1d8 [mac80211]() Modules linked in: wl1271_sdio wl1271 firmware_class crc7 mac80211 cfg80211 [] (unwind_backtrace+0x0/0x124) from [] (warn_slowpath_common+0x4c/0x64) [] (warn_slowpath_common+0x4c/0x64) from [] (warn_slowpath_null+0x18/0x1c) [] (warn_slowpath_null+0x18/0x1c) from [] (ieee80211_duration+0x54/0x1d8 [mac80211]) [] (ieee80211_duration+0x54/0x1d8 [mac80211]) from [] (invoke_tx_handlers+0xfa0/0x1088 [mac80211]) [] (invoke_tx_handlers+0xfa0/0x1088 [mac80211]) from [] (ieee80211_tx+0x84/0x248 [mac80211]) [] (ieee80211_tx+0x84/0x248 [mac80211]) from [] (ieee80211_tx_pending+0x12c/0x278 [mac80211]) [] (ieee80211_tx_pending+0x12c/0x278 [mac80211]) from [] (tasklet_action+0x68/0xbc) [] (tasklet_action+0x68/0xbc) from [] (__do_softirq+0x84/0x114) [] (__do_softirq+0x84/0x114) from [] (do_softirq+0x48/0x54) [] (do_softirq+0x48/0x54) from [] (local_bh_enable+0x98/0xcc) [] (local_bh_enable+0x98/0xcc) from [] (wl1271_rx+0x2e8/0x3a4 [wl1271]) [] (wl1271_rx+0x2e8/0x3a4 [wl1271]) from [] (wl1271_irq_work+0x230/0x310 [wl1271]) [] (wl1271_irq_work+0x230/0x310 [wl1271]) from [] (process_one_work+0x208/0x350) [] (process_one_work+0x208/0x350) from [] (worker_thread+0x1cc/0x300) [] (worker_thread+0x1cc/0x300) from [] (kthread+0x84/0x8c) [] (kthread+0x84/0x8c) from [] (kernel_thread_exit+0x0/0x8) Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ffc6749..8fbbc7a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1394,7 +1394,8 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) /* handlers after fragment must be aware of tx info fragmentation! */ CALL_TXH(ieee80211_tx_h_stats); CALL_TXH(ieee80211_tx_h_encrypt); - CALL_TXH(ieee80211_tx_h_calculate_duration); + if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) + CALL_TXH(ieee80211_tx_h_calculate_duration); #undef CALL_TXH txh_done: -- cgit v1.1 From d057e5a381cbaec5632117bf62ba49438ab16214 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Mon, 31 Jan 2011 22:29:13 +0200 Subject: mac80211: add HW flag for disabling auto link-PS in AP mode When operating in AP mode the wl1271 hardware filters out null-data packets as well as management packets. This makes it impossible for mac80211 to monitor the PS mode by using the PM bit of incoming frames. Implement a HW flag to indicate that mac80211 should ignore the PM bit. In addition, expose ieee80211_sta_ps_transition() to make low-level drivers capable of controlling PS-mode. Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- net/mac80211/rx.c | 27 +++++++++++++++++++++++++-- net/mac80211/sta_info.c | 3 ++- net/mac80211/status.c | 4 ++++ 3 files changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7185c93..d78d6fc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1105,7 +1105,8 @@ static void ap_sta_ps_start(struct sta_info *sta) atomic_inc(&sdata->bss->num_sta_ps); set_sta_flags(sta, WLAN_STA_PS_STA); - drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); + if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) + drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", sdata->name, sta->sta.addr, sta->sta.aid); @@ -1134,6 +1135,27 @@ static void ap_sta_ps_end(struct sta_info *sta) ieee80211_sta_ps_deliver_wakeup(sta); } +int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start) +{ + struct sta_info *sta_inf = container_of(sta, struct sta_info, sta); + bool in_ps; + + WARN_ON(!(sta_inf->local->hw.flags & IEEE80211_HW_AP_LINK_PS)); + + /* Don't let the same PS state be set twice */ + in_ps = test_sta_flags(sta_inf, WLAN_STA_PS_STA); + if ((start && in_ps) || (!start && !in_ps)) + return -EINVAL; + + if (start) + ap_sta_ps_start(sta_inf); + else + ap_sta_ps_end(sta_inf); + + return 0; +} +EXPORT_SYMBOL(ieee80211_sta_ps_transition); + static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { @@ -1178,7 +1200,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Change STA power saving mode only at the end of a frame * exchange sequence. */ - if (!ieee80211_has_morefrags(hdr->frame_control) && + if (!(sta->local->hw.flags & IEEE80211_HW_AP_LINK_PS) && + !ieee80211_has_morefrags(hdr->frame_control) && !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) && (rx->sdata->vif.type == NL80211_IFTYPE_AP || rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index c426504..5a11078 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -899,7 +899,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) struct ieee80211_local *local = sdata->local; int sent, buffered; - drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); + if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) + drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); if (!skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 38a7972..ffb0de9 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -98,6 +98,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, * (b) always process RX events before TX status events if ordering * can be unknown, for example with different interrupt status * bits. + * (c) if PS mode transitions are manual (i.e. the flag + * %IEEE80211_HW_AP_LINK_PS is set), always process PS state + * changes before calling TX status events if ordering can be + * unknown. */ if (test_sta_flags(sta, WLAN_STA_PS_STA) && skb_queue_len(&sta->tx_filtered) < STA_MAX_TX_BUFFER) { -- cgit v1.1 From 771bbd09f7febb854dd7c30f983aa57535f9e8c9 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Tue, 1 Feb 2011 13:23:05 +0200 Subject: mac80211: pass up beacons from external BSS when operating as AP Beacons from external BSSes are required for updating overlapping BSS info (i.e. ERP protection). Pass them up unconditionally. Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d78d6fc..c08b8e9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2698,7 +2698,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 0; } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { - if (!(status->rx_flags & IEEE80211_RX_IN_SCAN)) + if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) && + !ieee80211_is_beacon(hdr->frame_control)) return 0; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } -- cgit v1.1 From e9d7732eafe38a717212648b7615399e68abb551 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Feb 2011 15:35:36 +0100 Subject: mac80211: allow GO to scan like AP There's no point in disallowing scanning for a GO interface when it's not beaconing yet. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 88b0884..845c76d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1287,8 +1287,11 @@ static int ieee80211_scan(struct wiphy *wiphy, case NL80211_IFTYPE_P2P_GO: if (sdata->local->ops->hw_scan) break; - /* FIXME: implement NoA while scanning in software */ - return -EOPNOTSUPP; + /* + * FIXME: implement NoA while scanning in software, + * for now fall through to allow scanning only when + * beaconing hasn't been configured yet + */ case NL80211_IFTYPE_AP: if (sdata->u.ap.beacon) return -EOPNOTSUPP; -- cgit v1.1 From 747d753df7fea1d2d29c5c33623f6d2e5d0ed2d6 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 3 Feb 2011 18:34:28 +0200 Subject: mac80211: Remove obsolete TKIP flexibility The TKIP implementation was originally prepared to be a bit more flexible in the way Michael MIC TX/RX keys are configured. However, we are now taking care of the TX/RX MIC key swapping in user space, so this code will not be needed. Similarly, there were some remaining WPA testing code that won't be used in their current form. Remove the unneeded extra complexity. Signed-off-by: Jouni Malinen Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/wpa.c | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index bee230d..cd5e730 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -26,13 +26,12 @@ ieee80211_tx_result ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) { - u8 *data, *key, *mic, key_offset; + u8 *data, *key, *mic; size_t data_len; unsigned int hdrlen; struct ieee80211_hdr *hdr; struct sk_buff *skb = tx->skb; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int authenticator; int tail; hdr = (struct ieee80211_hdr *)skb->data; @@ -62,15 +61,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) skb_headroom(skb) < TKIP_IV_LEN)) return TX_DROP; -#if 0 - authenticator = fc & IEEE80211_FCTL_FROMDS; /* FIX */ -#else - authenticator = 1; -#endif - key_offset = authenticator ? - NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY : - NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY; - key = &tx->key->conf.key[key_offset]; + key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; mic = skb_put(skb, MICHAEL_MIC_LEN); michael_mic(key, hdr, data, data_len, mic); @@ -81,14 +72,13 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) ieee80211_rx_result ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) { - u8 *data, *key = NULL, key_offset; + u8 *data, *key = NULL; size_t data_len; unsigned int hdrlen; u8 mic[MICHAEL_MIC_LEN]; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - int authenticator = 1, wpa_test = 0; /* No way to verify the MIC if the hardware stripped it */ if (status->flag & RX_FLAG_MMIC_STRIPPED) @@ -106,17 +96,9 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) data = skb->data + hdrlen; data_len = skb->len - hdrlen - MICHAEL_MIC_LEN; -#if 0 - authenticator = fc & IEEE80211_FCTL_TODS; /* FIX */ -#else - authenticator = 1; -#endif - key_offset = authenticator ? - NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY : - NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY; - key = &rx->key->conf.key[key_offset]; + key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY]; michael_mic(key, hdr, data, data_len, mic); - if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { + if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0) { if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; @@ -208,7 +190,7 @@ ieee80211_rx_result ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - int hdrlen, res, hwaccel = 0, wpa_test = 0; + int hdrlen, res, hwaccel = 0; struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -235,7 +217,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) hdr->addr1, hwaccel, rx->queue, &rx->tkip_iv32, &rx->tkip_iv16); - if (res != TKIP_DECRYPT_OK || wpa_test) + if (res != TKIP_DECRYPT_OK) return RX_DROP_UNUSABLE; /* Trim ICV */ -- cgit v1.1 From 681d119047761cc59a15c0bb86891f3a878997cf Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 3 Feb 2011 18:35:19 +0200 Subject: mac80211: Add testing functionality for TKIP TKIP countermeasures depend on devices being able to detect Michael MIC failures on received frames and for stations to report errors to the AP. In order to test that behavior, it is useful to be able to send out TKIP frames with incorrect Michael MIC. This testing behavior has minimal effect on the TX path, so it can be added to mac80211 for convenient use. The interface for using this functionality is a file in mac80211 netdev debugfs (tkip_mic_test). Writing a MAC address to the file makes mac80211 generate a dummy data frame that will be sent out using invalid Michael MIC value. In AP mode, the address needs to be for one of the associated stations or ff:ff:ff:ff:ff:ff to use a broadcast frame. In station mode, the address can be anything, e.g., the current BSSID. It should be noted that this functionality works correctly only when associated and using TKIP. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/debugfs_netdev.c | 102 +++++++++++++++++++++++++++++++++++++++++- net/mac80211/wpa.c | 7 +++ 2 files changed, 108 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 4cffbf6..dacace6 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -36,7 +36,7 @@ static ssize_t ieee80211_if_read( ret = (*format)(sdata, buf, sizeof(buf)); read_unlock(&dev_base_lock); - if (ret != -EINVAL) + if (ret >= 0) ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); return ret; @@ -221,6 +221,104 @@ static ssize_t ieee80211_if_parse_smps(struct ieee80211_sub_if_data *sdata, __IEEE80211_IF_FILE_W(smps); +static ssize_t ieee80211_if_fmt_tkip_mic_test( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + return -EOPNOTSUPP; +} + +static int hwaddr_aton(const char *txt, u8 *addr) +{ + int i; + + for (i = 0; i < ETH_ALEN; i++) { + int a, b; + + a = hex_to_bin(*txt++); + if (a < 0) + return -1; + b = hex_to_bin(*txt++); + if (b < 0) + return -1; + *addr++ = (a << 4) | b; + if (i < 5 && *txt++ != ':') + return -1; + } + + return 0; +} + +static ssize_t ieee80211_if_parse_tkip_mic_test( + struct ieee80211_sub_if_data *sdata, const char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + u8 addr[ETH_ALEN]; + struct sk_buff *skb; + struct ieee80211_hdr *hdr; + __le16 fc; + + /* + * Assume colon-delimited MAC address with possible white space + * following. + */ + if (buflen < 3 * ETH_ALEN - 1) + return -EINVAL; + if (hwaddr_aton(buf, addr) < 0) + return -EINVAL; + + if (!ieee80211_sdata_running(sdata)) + return -ENOTCONN; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24 + 100); + if (!skb) + return -ENOMEM; + skb_reserve(skb, local->hw.extra_tx_headroom); + + hdr = (struct ieee80211_hdr *) skb_put(skb, 24); + memset(hdr, 0, 24); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); + /* DA BSSID SA */ + memcpy(hdr->addr1, addr, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(hdr->addr3, sdata->vif.addr, ETH_ALEN); + break; + case NL80211_IFTYPE_STATION: + fc |= cpu_to_le16(IEEE80211_FCTL_TODS); + /* BSSID SA DA */ + if (sdata->vif.bss_conf.bssid == NULL) { + dev_kfree_skb(skb); + return -ENOTCONN; + } + memcpy(hdr->addr1, sdata->vif.bss_conf.bssid, ETH_ALEN); + memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(hdr->addr3, addr, ETH_ALEN); + break; + default: + dev_kfree_skb(skb); + return -EOPNOTSUPP; + } + hdr->frame_control = fc; + + /* + * Add some length to the test frame to make it look bit more valid. + * The exact contents does not matter since the recipient is required + * to drop this because of the Michael MIC failure. + */ + memset(skb_put(skb, 50), 0, 50); + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_TKIP_MIC_FAILURE; + + ieee80211_tx_skb(sdata, skb); + + return buflen; +} + +__IEEE80211_IF_FILE_W(tkip_mic_test); + /* AP attributes */ IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); @@ -299,6 +397,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(last_beacon); DEBUGFS_ADD(ave_beacon); DEBUGFS_ADD_MODE(smps, 0600); + DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) @@ -313,6 +412,7 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(num_sta_ps); DEBUGFS_ADD(dtim_count); DEBUGFS_ADD(num_buffered_multicast); + DEBUGFS_ADD_MODE(tkip_mic_test, 0200); } static void add_wds_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index cd5e730..f1765de 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -46,6 +46,11 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) data = skb->data + hdrlen; data_len = skb->len - hdrlen; + if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) { + /* Need to use software crypto for the test */ + info->control.hw_key = NULL; + } + if (info->control.hw_key && !(tx->flags & IEEE80211_TX_FRAGMENTED) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { @@ -64,6 +69,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; mic = skb_put(skb, MICHAEL_MIC_LEN); michael_mic(key, hdr, data, data_len, mic); + if (unlikely(info->flags & IEEE80211_TX_INTFL_TKIP_MIC_FAILURE)) + mic[0]++; return TX_CONTINUE; } -- cgit v1.1 From 512119b36f7945a650877cbc7e9b5f4cc4d92e4c Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 31 Jan 2011 20:48:44 +0200 Subject: mac80211: fix race between next beacon dtim and ieee80211_get_buffered_bc On review of 'zd1211rw: implement beacon fetching and handling ieee80211_get_buffered_bc()', Christian Lamparter noted that [1]: Since zd_beacon_done also uploads the next beacon so long in advance, there could be an equally long race between the outdated state of the next beacon's DTIM broadcast traffic indicator (802.11-2007 7.3.2.6) which -in your case- was uploaded almost a beacon interval ago and the xmit of ieee80211_get_buffered_bc *now*. The dtim bc/mc bit might be not set, when a mc/bc arrived after the beacon was uploaded, but before the "beacon done event" from the hardware. So, dozing stations don't expect the broadcast traffic and of course, they might miss it completely. It's probably better to fix this in mac80211 (see the attached hack). [1] http://marc.info/?l=linux-wireless&m=129435041117256&w=2 CC: Christian Lamparter Signed-off-by: Jussi Kivilinna Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/tx.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c47d7c0..f71ed31 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -225,6 +225,7 @@ struct ieee80211_if_ap { struct sk_buff_head ps_bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ int dtim_count; + bool dtim_bc_mc; }; struct ieee80211_if_wds { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8fbbc7a..bf67a22 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2179,6 +2179,8 @@ static void ieee80211_beacon_add_tim(struct ieee80211_if_ap *bss, if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) aid0 = 1; + bss->dtim_bc_mc = aid0 == 1; + if (have_bits) { /* Find largest even number N1 so that bits numbered 1 through * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits @@ -2549,7 +2551,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head) goto out; - if (bss->dtim_count != 0) + if (bss->dtim_count != 0 || !bss->dtim_bc_mc) goto out; /* send buffered bc/mc only after DTIM beacon */ while (1) { -- cgit v1.1 From 8c99f69182fb9550ceedf599b32af335e743367b Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 2 Feb 2011 22:57:53 +0530 Subject: mac80211: do not restart ps timer during scan or offchannel While leaving oper channel, STA informs sleep state to AP to stop sending data. Till sending ack for the nullfunc, AP continues to send the data to STA which restarts ps_timer that is causing unnecessary nullfunc exchange on timer expiry when the STA was already moved to offchannel. So don't restart ps_timer on data reception during scan. This issue was identified by the following warning. WARNING: at net/mac80211/tx.c:661 invoke_tx_handlers+0xf07/0x1330 [mac80211] wlan0: Dropped data frame as no usable bitrate found while scanning and associated. Target station: 00:03:7f:0b:a6:1b on 5 GHz band Call Trace: [] invoke_tx_handlers+0xf07/0x1330 [mac80211] [] ieee80211_tx+0x86/0x2c0 [mac80211] [] ieee80211_xmit+0xb5/0x1d0 [mac80211] [] ieee80211_dynamic_ps_enable_work+0x0/0xb0 [mac80211] [] ieee80211_tx_skb+0x4f/0x60 [mac80211] [] ieee80211_send_nullfunc+0x46/0x60 [mac80211] [] ieee80211_dynamic_ps_enable_work+0xa5/0xb0 [mac80211] Reviewed-by: Johannes Berg Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- net/mac80211/rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c08b8e9..b37c341 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1952,7 +1952,10 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) dev->stats.rx_bytes += rx->skb->len; if (local->ps_sdata && local->hw.conf.dynamic_ps_timeout > 0 && - !is_multicast_ether_addr(((struct ethhdr *)rx->skb->data)->h_dest)) { + !is_multicast_ether_addr( + ((struct ethhdr *)rx->skb->data)->h_dest) && + (!local->scanning && + !test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))) { mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); } -- cgit v1.1 From b1f93314bfc4d5753391616735f6b8df96db901d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 4 Feb 2011 19:20:08 +0100 Subject: mac80211: do not send duplicate data frames to the cooked monitor interface I can't think of a valid use case for this aside from debugging (which can also be done with a real monitor interface), and dropping these frames saves some precious CPU cycles. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b37c341..753ffc4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -815,7 +815,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) rx->local->dot11FrameDuplicateCount++; rx->sta->num_duplicates++; } - return RX_DROP_MONITOR; + return RX_DROP_UNUSABLE; } else rx->sta->last_seq_ctrl[rx->queue] = hdr->seq_ctrl; } -- cgit v1.1 From b23b025fe246f3acc2988eb6d400df34c27cb8ae Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 4 Feb 2011 11:54:17 -0800 Subject: mac80211: Optimize scans on current operating channel. This should decrease un-necessary flushes, on/off channel work, and channel changes in cases where the only scanned channel is the current operating channel. * Removes SCAN_OFF_CHANNEL flag, uses SDATA_STATE_OFFCHANNEL and is-scanning flags instead. * Add helper method to determine if we are currently configured for the operating channel. * Do no blindly go off/on channel in work.c Instead, only call appropriate on/off code when we really need to change channels. Always enable offchannel-ps mode when starting work, and disable it when we are done. * Consolidate ieee80211_offchannel_stop_station and ieee80211_offchannel_stop_beaconing, call it ieee80211_offchannel_stop_vifs instead. * Accept non-beacon frames when scanning on operating channel. * Scan state machine optimized to minimize on/off channel transitions. Also, when going on-channel, go ahead and re-enable beaconing. We're going to be there for 200ms, so seems like some useful beaconing could happen. Always enable offchannel-ps mode when starting software scan, and disable it when we are done. * Grab local->mtx earlier in __ieee80211_scan_completed_finish so that we are protected when calling hw_config(), etc. * Pass probe-responses up the stack if scanning on local channel, so that mlme can take a look. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 13 +++---- net/mac80211/main.c | 53 ++++++++++++++++++++++++---- net/mac80211/offchannel.c | 68 +++++++++++++++++++---------------- net/mac80211/rx.c | 12 ++----- net/mac80211/scan.c | 88 +++++++++++++++++++++++++++++++++------------- net/mac80211/tx.c | 3 +- net/mac80211/work.c | 66 ++++++++++++++++++++++++++++------ 7 files changed, 214 insertions(+), 89 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f71ed31..44eea1a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -655,8 +655,6 @@ struct tpt_led_trigger { * well be on the operating channel * @SCAN_HW_SCANNING: The hardware is scanning for us, we have no way to * determine if we are on the operating channel or not - * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, - * gets only set in conjunction with SCAN_SW_SCANNING * @SCAN_COMPLETED: Set for our scan work function when the driver reported * that the scan completed. * @SCAN_ABORTED: Set for our scan work function when the driver reported @@ -665,7 +663,6 @@ struct tpt_led_trigger { enum { SCAN_SW_SCANNING, SCAN_HW_SCANNING, - SCAN_OFF_CHANNEL, SCAN_COMPLETED, SCAN_ABORTED, }; @@ -1148,10 +1145,14 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); /* off-channel helpers */ -void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local); -void ieee80211_offchannel_stop_station(struct ieee80211_local *local); +bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local); +void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, + bool tell_ap); +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, + bool offchannel_ps_enable); void ieee80211_offchannel_return(struct ieee80211_local *local, - bool enable_beaconing); + bool enable_beaconing, + bool offchannel_ps_disable); void ieee80211_hw_roc_setup(struct ieee80211_local *local); /* interface handling */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 09a2744..c155c0b 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -98,6 +98,41 @@ static void ieee80211_reconfig_filter(struct work_struct *work) ieee80211_configure_filter(local); } +/* + * Returns true if we are logically configured to be on + * the operating channel AND the hardware-conf is currently + * configured on the operating channel. Compares channel-type + * as well. + */ +bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local) +{ + struct ieee80211_channel *chan, *scan_chan; + enum nl80211_channel_type channel_type; + + /* This logic needs to match logic in ieee80211_hw_config */ + if (local->scan_channel) { + chan = local->scan_channel; + channel_type = NL80211_CHAN_NO_HT; + } else if (local->tmp_channel) { + chan = scan_chan = local->tmp_channel; + channel_type = local->tmp_channel_type; + } else { + chan = local->oper_channel; + channel_type = local->_oper_channel_type; + } + + if (chan != local->oper_channel || + channel_type != local->_oper_channel_type) + return false; + + /* Check current hardware-config against oper_channel. */ + if ((local->oper_channel != local->hw.conf.channel) || + (local->_oper_channel_type != local->hw.conf.channel_type)) + return false; + + return true; +} + int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) { struct ieee80211_channel *chan, *scan_chan; @@ -110,21 +145,27 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) scan_chan = local->scan_channel; + /* If this off-channel logic ever changes, ieee80211_on_oper_channel + * may need to change as well. + */ offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (scan_chan) { chan = scan_chan; channel_type = NL80211_CHAN_NO_HT; - local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; - } else if (local->tmp_channel && - local->oper_channel != local->tmp_channel) { + } else if (local->tmp_channel) { chan = scan_chan = local->tmp_channel; channel_type = local->tmp_channel_type; - local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; } else { chan = local->oper_channel; channel_type = local->_oper_channel_type; - local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; } + + if (chan != local->oper_channel || + channel_type != local->_oper_channel_type) + local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; + else + local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; + offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (offchannel_flag || chan != local->hw.conf.channel || @@ -231,7 +272,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, if (changed & BSS_CHANGED_BEACON_ENABLED) { if (local->quiescing || !ieee80211_sdata_running(sdata) || - test_bit(SCAN_SW_SCANNING, &local->scanning)) { + test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) { sdata->vif.bss_conf.enable_beacon = false; } else { /* diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index b4e5267..13427b1 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -17,10 +17,14 @@ #include "driver-trace.h" /* - * inform AP that we will go to sleep so that it will buffer the frames - * while we scan + * Tell our hardware to disable PS. + * Optionally inform AP that we will go to sleep so that it will buffer + * the frames while we are doing off-channel work. This is optional + * because we *may* be doing work on-operating channel, and want our + * hardware unconditionally awake, but still let the AP send us normal frames. */ -static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) +static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata, + bool tell_ap) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -41,8 +45,8 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } - if (!(local->offchannel_ps_enabled) || - !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) + if (tell_ap && (!local->offchannel_ps_enabled || + !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))) /* * If power save was enabled, no need to send a nullfunc * frame because AP knows that we are sleeping. But if the @@ -77,6 +81,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) * we are sleeping, let's just enable power save mode in * hardware. */ + /* TODO: Only set hardware if CONF_PS changed? + * TODO: Should we set offchannel_ps_enabled to false? + */ local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } else if (local->hw.conf.dynamic_ps_timeout > 0) { @@ -95,63 +102,61 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata) ieee80211_sta_reset_conn_monitor(sdata); } -void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) +void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local, + bool offchannel_ps_enable) { struct ieee80211_sub_if_data *sdata; + /* + * notify the AP about us leaving the channel and stop all + * STA interfaces. + */ mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - /* disable beaconing */ + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) + set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); + + /* Check to see if we should disable beaconing. */ if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ieee80211_bss_info_change_notify( sdata, BSS_CHANGED_BEACON_ENABLED); - /* - * only handle non-STA interfaces here, STA interfaces - * are handled in ieee80211_offchannel_stop_station(), - * e.g., from the background scan state machine. - * - * In addition, do not stop monitor interface to allow it to be - * used from user space controlled off-channel operations. - */ - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_MONITOR) { - set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) { netif_tx_stop_all_queues(sdata->dev); + if (offchannel_ps_enable && + (sdata->vif.type == NL80211_IFTYPE_STATION) && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata, true); } } mutex_unlock(&local->iflist_mtx); } -void ieee80211_offchannel_stop_station(struct ieee80211_local *local) +void ieee80211_offchannel_enable_all_ps(struct ieee80211_local *local, + bool tell_ap) { struct ieee80211_sub_if_data *sdata; - /* - * notify the AP about us leaving the channel and stop all STA interfaces - */ mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); - netif_tx_stop_all_queues(sdata->dev); - if (sdata->u.mgd.associated) - ieee80211_offchannel_ps_enable(sdata); - } + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_offchannel_ps_enable(sdata, tell_ap); } mutex_unlock(&local->iflist_mtx); } void ieee80211_offchannel_return(struct ieee80211_local *local, - bool enable_beaconing) + bool enable_beaconing, + bool offchannel_ps_disable) { struct ieee80211_sub_if_data *sdata; @@ -161,7 +166,8 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, continue; /* Tell AP we're back */ - if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (offchannel_ps_disable && + sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.associated) ieee80211_offchannel_ps_disable(sdata); } @@ -181,7 +187,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, netif_tx_wake_all_queues(sdata->dev); } - /* re-enable beaconing */ + /* Check to see if we should re-enable beaconing */ if (enable_beaconing && (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_ADHOC || diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 753ffc4..b5f59ed 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -409,16 +409,10 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN))) return RX_CONTINUE; - if (test_bit(SCAN_HW_SCANNING, &local->scanning)) + if (test_bit(SCAN_HW_SCANNING, &local->scanning) || + test_bit(SCAN_SW_SCANNING, &local->scanning)) return ieee80211_scan_rx(rx->sdata, skb); - if (test_bit(SCAN_SW_SCANNING, &local->scanning)) { - /* drop all the other packets during a software scan anyway */ - if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED) - dev_kfree_skb(skb); - return RX_QUEUED; - } - /* scanning finished during invoking of handlers */ I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); return RX_DROP_UNUSABLE; @@ -2793,7 +2787,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, local->dot11ReceivedFragmentCount++; if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || - test_bit(SCAN_OFF_CHANNEL, &local->scanning))) + test_bit(SCAN_SW_SCANNING, &local->scanning))) status->rx_flags |= IEEE80211_RX_IN_SCAN; if (ieee80211_is_mgmt(fc)) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 1ef73be..0ea6ada 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -212,6 +212,14 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) if (bss) ieee80211_rx_bss_put(sdata->local, bss); + /* If we are on-operating-channel, and this packet is for the + * current channel, pass the pkt on up the stack so that + * the rest of the stack can make use of it. + */ + if (ieee80211_cfg_on_oper_channel(sdata->local) + && (channel == sdata->local->oper_channel)) + return RX_CONTINUE; + dev_kfree_skb(skb); return RX_QUEUED; } @@ -293,15 +301,31 @@ static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, bool was_hw_scan) { struct ieee80211_local *local = hw_to_local(hw); + bool on_oper_chan; + bool enable_beacons = false; + + mutex_lock(&local->mtx); + on_oper_chan = ieee80211_cfg_on_oper_channel(local); + + if (was_hw_scan || !on_oper_chan) { + if (WARN_ON(local->scan_channel)) + local->scan_channel = NULL; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + } - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); if (!was_hw_scan) { + bool on_oper_chan2; ieee80211_configure_filter(local); drv_sw_scan_complete(local); - ieee80211_offchannel_return(local, true); + on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); + /* We should always be on-channel at this point. */ + WARN_ON(!on_oper_chan2); + if (on_oper_chan2 && (on_oper_chan != on_oper_chan2)) + enable_beacons = true; + + ieee80211_offchannel_return(local, enable_beacons, true); } - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); @@ -341,13 +365,15 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) */ drv_sw_scan_start(local); - ieee80211_offchannel_stop_beaconing(local); - local->leave_oper_channel_time = 0; local->next_scan_state = SCAN_DECISION; local->scan_channel_idx = 0; - drv_flush(local, false); + /* We always want to use off-channel PS, even if we + * are not really leaving oper-channel. Don't + * tell the AP though, as long as we are on-channel. + */ + ieee80211_offchannel_enable_all_ps(local, false); ieee80211_configure_filter(local); @@ -487,7 +513,21 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, } mutex_unlock(&local->iflist_mtx); - if (local->scan_channel) { + next_chan = local->scan_req->channels[local->scan_channel_idx]; + + if (ieee80211_cfg_on_oper_channel(local)) { + /* We're currently on operating channel. */ + if ((next_chan == local->oper_channel) && + (local->_oper_channel_type == NL80211_CHAN_NO_HT)) + /* We don't need to move off of operating channel. */ + local->next_scan_state = SCAN_SET_CHANNEL; + else + /* + * We do need to leave operating channel, as next + * scan is somewhere else. + */ + local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; + } else { /* * we're currently scanning a different channel, let's * see if we can scan another channel without interfering @@ -503,7 +543,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, * * Otherwise switch back to the operating channel. */ - next_chan = local->scan_req->channels[local->scan_channel_idx]; bad_latency = time_after(jiffies + ieee80211_scan_get_channel_time(next_chan), @@ -521,12 +560,6 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, local->next_scan_state = SCAN_ENTER_OPER_CHANNEL; else local->next_scan_state = SCAN_SET_CHANNEL; - } else { - /* - * we're on the operating channel currently, let's - * leave that channel now to scan another one - */ - local->next_scan_state = SCAN_LEAVE_OPER_CHANNEL; } *next_delay = 0; @@ -535,9 +568,10 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, unsigned long *next_delay) { - ieee80211_offchannel_stop_station(local); - - __set_bit(SCAN_OFF_CHANNEL, &local->scanning); + /* PS will already be in off-channel mode, + * we do that once at the beginning of scanning. + */ + ieee80211_offchannel_stop_vifs(local, false); /* * What if the nullfunc frames didn't arrive? @@ -560,15 +594,15 @@ static void ieee80211_scan_state_enter_oper_channel(struct ieee80211_local *loca { /* switch back to the operating channel */ local->scan_channel = NULL; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); + if (!ieee80211_cfg_on_oper_channel(local)) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); /* - * Only re-enable station mode interface now; beaconing will be - * re-enabled once the full scan has been completed. + * Re-enable vifs and beaconing. Leave PS + * in off-channel state..will put that back + * on-channel at the end of scanning. */ - ieee80211_offchannel_return(local, false); - - __clear_bit(SCAN_OFF_CHANNEL, &local->scanning); + ieee80211_offchannel_return(local, true, false); *next_delay = HZ / 5; local->next_scan_state = SCAN_DECISION; @@ -584,8 +618,12 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, chan = local->scan_req->channels[local->scan_channel_idx]; local->scan_channel = chan; - if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) - skip = 1; + + /* Only call hw-config if we really need to change channels. */ + if ((chan != local->hw.conf.channel) || + (local->hw.conf.channel_type != NL80211_CHAN_NO_HT)) + if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) + skip = 1; /* advance state machine to next channel/band */ local->scan_channel_idx++; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bf67a22..2915168 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -257,7 +257,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) return TX_CONTINUE; - if (unlikely(test_bit(SCAN_OFF_CHANNEL, &tx->local->scanning)) && + if (unlikely(test_bit(SCAN_SW_SCANNING, &tx->local->scanning)) && + test_bit(SDATA_STATE_OFFCHANNEL, &tx->sdata->state) && !ieee80211_is_probe_req(hdr->frame_control) && !ieee80211_is_nullfunc(hdr->frame_control)) /* diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 36305e0..6bf787a 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -924,18 +924,44 @@ static void ieee80211_work_work(struct work_struct *work) } if (!started && !local->tmp_channel) { - /* - * TODO: could optimize this by leaving the - * station vifs in awake mode if they - * happen to be on the same channel as - * the requested channel - */ - ieee80211_offchannel_stop_beaconing(local); - ieee80211_offchannel_stop_station(local); + bool on_oper_chan; + bool tmp_chan_changed = false; + bool on_oper_chan2; + on_oper_chan = ieee80211_cfg_on_oper_channel(local); + if (local->tmp_channel) + if ((local->tmp_channel != wk->chan) || + (local->tmp_channel_type != wk->chan_type)) + tmp_chan_changed = true; local->tmp_channel = wk->chan; local->tmp_channel_type = wk->chan_type; - ieee80211_hw_config(local, 0); + /* + * Leave the station vifs in awake mode if they + * happen to be on the same channel as + * the requested channel. + */ + on_oper_chan2 = ieee80211_cfg_on_oper_channel(local); + if (on_oper_chan != on_oper_chan2) { + if (on_oper_chan2) { + /* going off oper channel, PS too */ + ieee80211_offchannel_stop_vifs(local, + true); + ieee80211_hw_config(local, 0); + } else { + /* going on channel, but leave PS + * off-channel. */ + ieee80211_hw_config(local, 0); + ieee80211_offchannel_return(local, + true, + false); + } + } else if (tmp_chan_changed) + /* Still off-channel, but on some other + * channel, so update hardware. + * PS should already be off-channel. + */ + ieee80211_hw_config(local, 0); + started = true; wk->timeout = jiffies; } @@ -1011,9 +1037,27 @@ static void ieee80211_work_work(struct work_struct *work) } if (!remain_off_channel && local->tmp_channel) { + bool on_oper_chan = ieee80211_cfg_on_oper_channel(local); local->tmp_channel = NULL; - ieee80211_hw_config(local, 0); - ieee80211_offchannel_return(local, true); + /* If tmp_channel wasn't operating channel, then + * we need to go back on-channel. + * NOTE: If we can ever be here while scannning, + * or if the hw_config() channel config logic changes, + * then we may need to do a more thorough check to see if + * we still need to do a hardware config. Currently, + * we cannot be here while scanning, however. + */ + if (ieee80211_cfg_on_oper_channel(local) && !on_oper_chan) + ieee80211_hw_config(local, 0); + + /* At the least, we need to disable offchannel_ps, + * so just go ahead and run the entire offchannel + * return logic here. We *could* skip enabling + * beaconing if we were already on-oper-channel + * as a future optimization. + */ + ieee80211_offchannel_return(local, true, true); + /* give connection some time to breathe */ run_again(local, jiffies + HZ/2); } -- cgit v1.1 From 0131ba451e20239c5dc701027c1a2edef95e1a6e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Feb 2011 14:37:30 -0800 Subject: ipv4: Don't miss existing cached metrics in new routes. Always lookup to see if we have an existing inetpeer entry for a route. Let FLOWI_FLAG_PRECOW_METRICS merely influence the "create" argument to rt_bind_peer(). Also, call rt_bind_peer() unconditionally since it is not possible for rt->peer to be non-NULL at this point. Signed-off-by: David S. Miller --- net/ipv4/route.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e4c8165..0ba6a38 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1859,25 +1859,28 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) { - if (!(rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS)) { - no_cow: - if (fi->fib_metrics != (u32 *) dst_default_metrics) { - rt->fi = fi; - atomic_inc(&fi->fib_clntref); - } - dst_init_metrics(&rt->dst, fi->fib_metrics, true); - } else { - struct inet_peer *peer; + struct inet_peer *peer; + int create = 0; - if (!rt->peer) - rt_bind_peer(rt, 1); - peer = rt->peer; - if (!peer) - goto no_cow; + /* If a peer entry exists for this destination, we must hook + * it up in order to get at cached metrics. + */ + if (rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS) + create = 1; + + rt_bind_peer(rt, create); + peer = rt->peer; + if (peer) { if (inet_metrics_new(peer)) memcpy(peer->metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); + } else { + if (fi->fib_metrics != (u32 *) dst_default_metrics) { + rt->fi = fi; + atomic_inc(&fi->fib_clntref); + } + dst_init_metrics(&rt->dst, fi->fib_metrics, true); } } -- cgit v1.1 From 92d8682926342d2b6aa5b2ecc02221e00e1573a0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Feb 2011 15:55:25 -0800 Subject: inetpeer: Move ICMP rate limiting state into inet_peer entries. Like metrics, the ICMP rate limiting bits are cached state about a destination. So move it into the inet_peer entries. If an inet_peer cannot be bound (the reason is memory allocation failure or similar), the policy is to allow. Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 49 ++++++++------------------------------------ net/ipv4/inetpeer.c | 43 +++++++++++++++++++++++++++++++++++++++ net/ipv4/route.c | 56 ++++++++++++++++++++++++++++++++++----------------- net/ipv6/icmp.c | 16 ++++++++------- net/ipv6/ip6_output.c | 5 ++++- net/ipv6/ndisc.c | 4 +++- 6 files changed, 105 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4aa1b7f..ad2bcf1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -233,48 +233,11 @@ static inline void icmp_xmit_unlock(struct sock *sk) * Send an ICMP frame. */ -/* - * Check transmit rate limitation for given message. - * The rate information is held in the destination cache now. - * This function is generic and could be used for other purposes - * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. - * - * Note that the same dst_entry fields are modified by functions in - * route.c too, but these work for packet destinations while xrlim_allow - * works for icmp destinations. This means the rate limiting information - * for one "ip object" is shared - and these ICMPs are twice limited: - * by source and by destination. - * - * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate - * SHOULD allow setting of rate limits - * - * Shared between ICMPv4 and ICMPv6. - */ -#define XRLIM_BURST_FACTOR 6 -int xrlim_allow(struct dst_entry *dst, int timeout) -{ - unsigned long now, token = dst->rate_tokens; - int rc = 0; - - now = jiffies; - token += now - dst->rate_last; - dst->rate_last = now; - if (token > XRLIM_BURST_FACTOR * timeout) - token = XRLIM_BURST_FACTOR * timeout; - if (token >= timeout) { - token -= timeout; - rc = 1; - } - dst->rate_tokens = token; - return rc; -} -EXPORT_SYMBOL(xrlim_allow); - -static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, +static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, int type, int code) { struct dst_entry *dst = &rt->dst; - int rc = 1; + bool rc = true; if (type > NR_ICMP_TYPES) goto out; @@ -288,8 +251,12 @@ static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, goto out; /* Limit if icmp type is enabled in ratemask. */ - if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) - rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); + if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { + if (!rt->peer) + rt_bind_peer(rt, 1); + rc = inet_peer_xrlim_allow(rt->peer, + net->ipv4.sysctl_icmp_ratelimit); + } out: return rc; } diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index b6513b1..709fbb4 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -513,6 +513,8 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); p->tcp_ts_stamp = 0; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; + p->rate_tokens = 0; + p->rate_last = 0; INIT_LIST_HEAD(&p->unused); @@ -580,3 +582,44 @@ void inet_putpeer(struct inet_peer *p) local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_putpeer); + +/* + * Check transmit rate limitation for given message. + * The rate information is held in the inet_peer entries now. + * This function is generic and could be used for other purposes + * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. + * + * Note that the same inet_peer fields are modified by functions in + * route.c too, but these work for packet destinations while xrlim_allow + * works for icmp destinations. This means the rate limiting information + * for one "ip object" is shared - and these ICMPs are twice limited: + * by source and by destination. + * + * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate + * SHOULD allow setting of rate limits + * + * Shared between ICMPv4 and ICMPv6. + */ +#define XRLIM_BURST_FACTOR 6 +bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) +{ + unsigned long now, token; + bool rc = false; + + if (!peer) + return true; + + token = peer->rate_tokens; + now = jiffies; + token += now - peer->rate_last; + peer->rate_last = now; + if (token > XRLIM_BURST_FACTOR * timeout) + token = XRLIM_BURST_FACTOR * timeout; + if (token >= timeout) { + token -= timeout; + rc = true; + } + peer->rate_tokens = token; + return rc; +} +EXPORT_SYMBOL(inet_peer_xrlim_allow); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0ba6a38..2e225da 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1563,6 +1563,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct in_device *in_dev; + struct inet_peer *peer; int log_martians; rcu_read_lock(); @@ -1574,33 +1575,41 @@ void ip_rt_send_redirect(struct sk_buff *skb) log_martians = IN_DEV_LOG_MARTIANS(in_dev); rcu_read_unlock(); + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + if (!peer) { + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + return; + } + /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) - rt->dst.rate_tokens = 0; + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + peer->rate_tokens = 0; /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (rt->dst.rate_tokens >= ip_rt_redirect_number) { - rt->dst.rate_last = jiffies; + if (peer->rate_tokens >= ip_rt_redirect_number) { + peer->rate_last = jiffies; return; } /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (rt->dst.rate_tokens == 0 || + if (peer->rate_tokens == 0 || time_after(jiffies, - (rt->dst.rate_last + - (ip_rt_redirect_load << rt->dst.rate_tokens)))) { + (peer->rate_last + + (ip_rt_redirect_load << peer->rate_tokens)))) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); - rt->dst.rate_last = jiffies; - ++rt->dst.rate_tokens; + peer->rate_last = jiffies; + ++peer->rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && - rt->dst.rate_tokens == ip_rt_redirect_number && + peer->rate_tokens == ip_rt_redirect_number && net_ratelimit()) printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", &rt->rt_src, rt->rt_iif, @@ -1612,7 +1621,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) static int ip_error(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); + struct inet_peer *peer; unsigned long now; + bool send; int code; switch (rt->dst.error) { @@ -1632,15 +1643,24 @@ static int ip_error(struct sk_buff *skb) break; } - now = jiffies; - rt->dst.rate_tokens += now - rt->dst.rate_last; - if (rt->dst.rate_tokens > ip_rt_error_burst) - rt->dst.rate_tokens = ip_rt_error_burst; - rt->dst.rate_last = now; - if (rt->dst.rate_tokens >= ip_rt_error_cost) { - rt->dst.rate_tokens -= ip_rt_error_cost; - icmp_send(skb, ICMP_DEST_UNREACH, code, 0); + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + + send = true; + if (peer) { + now = jiffies; + peer->rate_tokens += now - peer->rate_last; + if (peer->rate_tokens > ip_rt_error_burst) + peer->rate_tokens = ip_rt_error_burst; + peer->rate_last = now; + if (peer->rate_tokens >= ip_rt_error_cost) + peer->rate_tokens -= ip_rt_error_cost; + else + send = false; } + if (send) + icmp_send(skb, ICMP_DEST_UNREACH, code, 0); out: kfree_skb(skb); return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 03e62f9..a31d91b 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -157,20 +157,20 @@ static int is_ineligible(struct sk_buff *skb) /* * Check the ICMP output rate limit */ -static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi *fl) +static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, + struct flowi *fl) { struct dst_entry *dst; struct net *net = sock_net(sk); - int res = 0; + bool res = false; /* Informational messages are not limited. */ if (type & ICMPV6_INFOMSG_MASK) - return 1; + return true; /* Do not limit pmtu discovery, it would break it. */ if (type == ICMPV6_PKT_TOOBIG) - return 1; + return true; /* * Look up the output route. @@ -182,7 +182,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { - res = 1; + res = true; } else { struct rt6_info *rt = (struct rt6_info *)dst; int tmo = net->ipv6.sysctl.icmpv6_time; @@ -191,7 +191,9 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - res = xrlim_allow(dst, tmo); + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); } dst_release(dst); return res; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5f8d242..2600e22 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -479,10 +479,13 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ - if (xrlim_allow(dst, 1*HZ)) + if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) ndisc_send_redirect(skb, n, target); } else { int addrtype = ipv6_addr_type(&hdr->saddr); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2342545..7254ce3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1553,7 +1553,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, "ICMPv6 Redirect: destination is not a neighbour.\n"); goto release; } - if (!xrlim_allow(dst, 1*HZ)) + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 1); + if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) goto release; if (dev->addr_len) { -- cgit v1.1 From 7c9989a76e62ceca90e5f31f8920fd6b7b8b6525 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 7 Feb 2011 11:38:55 +0300 Subject: IPVS: precedence bug in ip_vs_sync_switch_mode() '!' has higher precedence than '&'. IP_VS_STATE_MASTER is 0x1 so the original code is equivelent to if (!ipvs->sync_state) ... Signed-off-by: Dan Carpenter Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 2a2a836..d1b7298 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -392,7 +392,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode) { struct netns_ipvs *ipvs = net_ipvs(net); - if (!ipvs->sync_state & IP_VS_STATE_MASTER) + if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) return; if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) return; -- cgit v1.1 From 180205bdb22b79cd7b2a07a5002dd747badc82f3 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 4 Feb 2011 15:30:24 -0800 Subject: mac80211: Make some mlme timers module paramaters. This allows users to tune the connection-loss algorithms to be more or less lenient. In particular, larger null-func retries helps when using lots of virtual stations on a loaded network. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e059b3a..f77adf1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -28,8 +28,15 @@ #include "rate.h" #include "led.h" -#define IEEE80211_MAX_NULLFUNC_TRIES 2 -#define IEEE80211_MAX_PROBE_TRIES 5 +static int max_nullfunc_tries = 2; +module_param(max_nullfunc_tries, int, 0644); +MODULE_PARM_DESC(max_nullfunc_tries, + "Maximum nullfunc tx tries before disconnecting (reason 4)."); + +static int max_probe_tries = 5; +module_param(max_probe_tries, int, 0644); +MODULE_PARM_DESC(max_probe_tries, + "Maximum probe tries before disconnecting (reason 4)."); /* * Beacon loss timeout is calculated as N frames times the @@ -51,7 +58,11 @@ * a probe request because of beacon loss or for * checking the connection still works. */ -#define IEEE80211_PROBE_WAIT (HZ / 2) +static int probe_wait_ms = 500; +module_param(probe_wait_ms, int, 0644); +MODULE_PARM_DESC(probe_wait_ms, + "Maximum time(ms) to wait for probe response" + " before disconnecting (reason 4)."); /* * Weight given to the latest Beacon frame when calculating average signal @@ -1116,7 +1127,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *ssid; u8 *dst = ifmgd->associated->bssid; - u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3); + u8 unicast_limit = max(1, max_probe_tries - 3); /* * Try sending broadcast probe requests for the last three @@ -1142,7 +1153,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) } ifmgd->probe_send_count++; - ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; + ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); run_again(ifmgd, ifmgd->probe_timeout); } @@ -1243,7 +1254,8 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); - printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); + printk(KERN_DEBUG "%s: Connection to AP %pM lost.\n", + sdata->name, bssid); ieee80211_set_disassoc(sdata, true, true); mutex_unlock(&ifmgd->mtx); @@ -1988,9 +2000,9 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN); if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) - max_tries = IEEE80211_MAX_NULLFUNC_TRIES; + max_tries = max_nullfunc_tries; else - max_tries = IEEE80211_MAX_PROBE_TRIES; + max_tries = max_probe_tries; /* ACK received for nullfunc probing frame */ if (!ifmgd->probe_send_count) @@ -2022,7 +2034,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "%s: Failed to send nullfunc to AP %pM" " after %dms, disconnecting.\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); + bssid, probe_wait_ms); #endif ieee80211_sta_connection_lost(sdata, bssid); } else if (ifmgd->probe_send_count < max_tries) { @@ -2031,7 +2043,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "%s: No probe response from AP %pM" " after %dms, try %d/%i\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ, + bssid, probe_wait_ms, ifmgd->probe_send_count, max_tries); #endif ieee80211_mgd_probe_ap_send(sdata); @@ -2044,7 +2056,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) "%s: No probe response from AP %pM" " after %dms, disconnecting.\n", sdata->name, - bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); + bssid, probe_wait_ms); ieee80211_sta_connection_lost(sdata, bssid); } -- cgit v1.1 From 4f3123366f78cf34ce7caab923e2b3c4fe9e16c2 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 5 Feb 2011 23:48:37 +0100 Subject: mac80211: as a 4-addr station, do not receive packets for other stations Since 4-addr frames completely override the source address which will make it into the converted 802.3 frames, receiving frames for other 4-addr stations will confuse the bridging code. To be able to handle traffic for all connected devices, the bridge code will automatically turn on promiscuous mode, which triggers this problem. Signed-off-by: Felix Fietkau Reported-by: Steve Brown Signed-off-by: John W. Linville --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b5f59ed..50c2c88 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2646,7 +2646,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 0; if (!multicast && compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { - if (!(sdata->dev->flags & IFF_PROMISC)) + if (!(sdata->dev->flags & IFF_PROMISC) || + sdata->u.mgd.use_4addr) return 0; status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } -- cgit v1.1 From 38f37be20941a6f1931ca4c051e638f947415eab Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Mon, 7 Feb 2011 10:10:04 +0530 Subject: mac80211: Update comments on radiotap MCS index mac80211 now supports passing MCS index to radiotap, so update the comments regarding this Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: John W. Linville --- net/mac80211/rx.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 50c2c88..045b2fe 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -142,11 +142,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_RATE */ if (status->flag & RX_FLAG_HT) { /* - * TODO: add following information into radiotap header once - * suitable fields are defined for it: - * - MCS index (status->rate_idx) - * - HT40 (status->flag & RX_FLAG_40MHZ) - * - short-GI (status->flag & RX_FLAG_SHORT_GI) + * MCS information is a separate field in radiotap, + * added below. */ *pos = 0; } else { -- cgit v1.1 From 3ad97fbcc233a295f2ccc2c6bdeb32323e360a5e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 7 Feb 2011 22:03:35 +0300 Subject: mac80211: remove unneeded check "ap" is the address of sdata->u.ap so it can never be NULL here. Also we dereferenced it on the previous line. I removed the check. Signed-off-by: Dan Carpenter Signed-off-by: John W. Linville --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2915168..38e5939 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2245,7 +2245,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); - if (ap && beacon) { + if (beacon) { /* * headroom, head length, * tail length and maximum TIM length -- cgit v1.1 From b2c60d42db0fea1e6c4345739601024863566a13 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 14 Jan 2011 00:18:49 +0100 Subject: Bluetooth: Fix failure to release lock in read_index_list() If alloc_skb() fails in read_index_list() we'll return -ENOMEM without releasing 'hci_dev_list_lock'. Signed-off-by: Jesper Juhl Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f827fd9..ace8726 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -111,8 +111,10 @@ static int read_index_list(struct sock *sk) body_len = sizeof(*ev) + sizeof(*rp) + (2 * count); skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); - if (!skb) + if (!skb) { + read_unlock(&hci_dev_list_lock); return -ENOMEM; + } hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); -- cgit v1.1 From e702112ff68a554bcac16bb03ddc2b8e5425bcbf Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Mon, 3 Jan 2011 11:14:36 +0200 Subject: Bluetooth: Use non-flushable by default L2CAP data packets Modification of Nick Pelly patch. With Bluetooth 2.1 ACL packets can be flushable or non-flushable. This commit makes ACL data packets non-flushable by default on compatible chipsets, and adds the BT_FLUSHABLE socket option to explicitly request flushable ACL data packets for a given L2CAP socket. This is useful for A2DP data which can be safely discarded if it can not be delivered within a short time (while other ACL data should not be discarded). Note that making ACL data flushable has no effect unless the automatic flush timeout for that ACL link is changed from its default of 0 (infinite). Default packet types (for compatible chipsets): Frame 34: 13 bytes on wire (104 bits), 13 bytes captured (104 bits) Bluetooth HCI H4 Bluetooth HCI ACL Packet .... 0000 0000 0010 = Connection Handle: 0x0002 ..00 .... .... .... = PB Flag: First Non-automatically Flushable Packet (0) 00.. .... .... .... = BC Flag: Point-To-Point (0) Data Total Length: 8 Bluetooth L2CAP Packet After setting BT_FLUSHABLE (sock.setsockopt(274 /*SOL_BLUETOOTH*/, 8 /* BT_FLUSHABLE */, 1 /* flush */)) Frame 34: 13 bytes on wire (104 bits), 13 bytes captured (104 bits) Bluetooth HCI H4 Bluetooth HCI ACL Packet .... 0000 0000 0010 = Connection Handle: 0x0002 ..10 .... .... .... = PB Flag: First Automatically Flushable Packet (2) 00.. .... .... .... = BC Flag: Point-To-Point (0) Data Total Length: 8 Bluetooth L2CAP Packet Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 7 ++++-- net/bluetooth/l2cap.c | 59 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9c4541b..9ba92ad 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1395,7 +1395,7 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) skb->dev = (void *) hdev; bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; - hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); + hci_add_acl_hdr(skb, conn->handle, flags); list = skb_shinfo(skb)->frag_list; if (!list) { @@ -1413,12 +1413,15 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) spin_lock_bh(&conn->data_q.lock); __skb_queue_tail(&conn->data_q, skb); + + flags &= ~ACL_START; + flags |= ACL_CONT; do { skb = list; list = list->next; skb->dev = (void *) hdev; bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; - hci_add_acl_hdr(skb, conn->handle, flags | ACL_CONT); + hci_add_acl_hdr(skb, conn->handle, flags); BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 675614e..4bf98df 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -373,13 +373,19 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn) static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) { struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); + u8 flags; BT_DBG("code 0x%2.2x", code); if (!skb) return; - hci_send_acl(conn->hcon, skb, 0); + if (lmp_no_flush_capable(conn->hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(conn->hcon, skb, flags); } static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) @@ -389,6 +395,7 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) struct l2cap_conn *conn = pi->conn; struct sock *sk = (struct sock *)pi; int count, hlen = L2CAP_HDR_SIZE + 2; + u8 flags; if (sk->sk_state != BT_CONNECTED) return; @@ -425,7 +432,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) put_unaligned_le16(fcs, skb_put(skb, 2)); } - hci_send_acl(pi->conn->hcon, skb, 0); + if (lmp_no_flush_capable(conn->hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(pi->conn->hcon, skb, flags); } static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) @@ -912,6 +924,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) pi->sec_level = l2cap_pi(parent)->sec_level; pi->role_switch = l2cap_pi(parent)->role_switch; pi->force_reliable = l2cap_pi(parent)->force_reliable; + pi->flushable = l2cap_pi(parent)->flushable; } else { pi->imtu = L2CAP_DEFAULT_MTU; pi->omtu = 0; @@ -927,6 +940,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) pi->sec_level = BT_SECURITY_LOW; pi->role_switch = 0; pi->force_reliable = 0; + pi->flushable = BT_FLUSHABLE_OFF; } /* Default config options */ @@ -1431,10 +1445,17 @@ static void l2cap_drop_acked_frames(struct sock *sk) static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) { struct l2cap_pinfo *pi = l2cap_pi(sk); + struct hci_conn *hcon = pi->conn->hcon; + u16 flags; BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len); - hci_send_acl(pi->conn->hcon, skb, 0); + if (!pi->flushable && lmp_no_flush_capable(hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(hcon, skb, flags); } static void l2cap_streaming_send(struct sock *sk) @@ -2079,6 +2100,30 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch bt_sk(sk)->defer_setup = opt; break; + case BT_FLUSHABLE: + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > BT_FLUSHABLE_ON) { + err = -EINVAL; + break; + } + + if (opt == BT_FLUSHABLE_OFF) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + /* proceed futher only when we have l2cap_conn and + No Flush support in the LM */ + if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) { + err = -EINVAL; + break; + } + } + + l2cap_pi(sk)->flushable = opt; + break; + default: err = -ENOPROTOOPT; break; @@ -2218,6 +2263,12 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch break; + case BT_FLUSHABLE: + if (put_user(l2cap_pi(sk)->flushable, (u32 __user *) optval)) + err = -EFAULT; + + break; + default: err = -ENOPROTOOPT; break; @@ -4678,7 +4729,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags); - if (flags & ACL_START) { + if (!(flags & ACL_CONT)) { struct l2cap_hdr *hdr; struct sock *sk; u16 cid; -- cgit v1.1 From 7990681c409e8a31eac122342e64da6c3b77a249 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Mon, 24 Jan 2011 16:01:43 -0200 Subject: Bluetooth: Fix setting of MTU for ERTM and Streaming Mode The desired MTU should be sent in an Config_Req for all modes. Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 4bf98df..cbaa740 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -2569,11 +2569,11 @@ static int l2cap_build_conf_req(struct sock *sk, void *data) } done: + if (pi->imtu != L2CAP_DEFAULT_MTU) + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); + switch (pi->mode) { case L2CAP_MODE_BASIC: - if (pi->imtu != L2CAP_DEFAULT_MTU) - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); - if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) && !(pi->conn->feat_mask & L2CAP_FEAT_STREAMING)) break; -- cgit v1.1 From ab81cbf99c881ca2b9a83682a8722fc84b2483d2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 15 Dec 2010 13:53:18 +0200 Subject: Bluetooth: Implement automatic setup procedure for local adapters This patch implements automatic initialization of basic information about newly registered Bluetooth adapters. E.g. the address and features are always needed so it makes sense for the kernel to automatically power on adapters and read this information. A new HCI_SETUP flag is added to track this state. In order to not consume unnecessary amounts of power if there isn't a user space available that could switch the adapter back off, a timer is added to do this automatically as long as no Bluetooth user space seems to be present. A new HCI_AUTO_OFF flag is added that user space needs to clear to avoid the automatic power off. Additionally, the management interface index_added event is moved to the end of the HCI_SETUP stage so a user space supporting the managment inteface has all the necessary information available for fetching when it gets notified of a new adapter. The HCI_DEV_REG event is kept in the same place as before since existing HCI raw socket based user space versions depend on seeing the kernels initialization sequence (hci_init_req) to determine when the adapter is ready for use. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- net/bluetooth/mgmt.c | 8 ++++++ 2 files changed, 70 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9ba92ad..b22ce9f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -50,6 +50,8 @@ #include #include +#define AUTO_OFF_TIMEOUT 2000 + static void hci_cmd_task(unsigned long arg); static void hci_rx_task(unsigned long arg); static void hci_tx_task(unsigned long arg); @@ -794,6 +796,7 @@ int hci_get_dev_list(void __user *arg) list_for_each(p, &hci_dev_list) { struct hci_dev *hdev; hdev = list_entry(p, struct hci_dev, list); + hci_del_off_timer(hdev); (dr + n)->dev_id = hdev->id; (dr + n)->dev_opt = hdev->flags; if (++n >= dev_num) @@ -823,6 +826,8 @@ int hci_get_dev_info(void __user *arg) if (!hdev) return -ENODEV; + hci_del_off_timer(hdev); + strcpy(di.name, hdev->name); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4); @@ -891,6 +896,51 @@ void hci_free_dev(struct hci_dev *hdev) } EXPORT_SYMBOL(hci_free_dev); +static void hci_power_on(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, power_on); + + BT_DBG("%s", hdev->name); + + if (hci_dev_open(hdev->id) < 0) + return; + + if (test_bit(HCI_AUTO_OFF, &hdev->flags)) + mod_timer(&hdev->off_timer, + jiffies + msecs_to_jiffies(AUTO_OFF_TIMEOUT)); + + if (test_and_clear_bit(HCI_SETUP, &hdev->flags)) + mgmt_index_added(hdev->id); +} + +static void hci_power_off(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, power_off); + + BT_DBG("%s", hdev->name); + + hci_dev_close(hdev->id); +} + +static void hci_auto_off(unsigned long data) +{ + struct hci_dev *hdev = (struct hci_dev *) data; + + BT_DBG("%s", hdev->name); + + clear_bit(HCI_AUTO_OFF, &hdev->flags); + + queue_work(hdev->workqueue, &hdev->power_off); +} + +void hci_del_off_timer(struct hci_dev *hdev) +{ + BT_DBG("%s", hdev->name); + + clear_bit(HCI_AUTO_OFF, &hdev->flags); + del_timer(&hdev->off_timer); +} + /* Register HCI device */ int hci_register_dev(struct hci_dev *hdev) { @@ -948,6 +998,10 @@ int hci_register_dev(struct hci_dev *hdev) INIT_LIST_HEAD(&hdev->blacklist); + INIT_WORK(&hdev->power_on, hci_power_on); + INIT_WORK(&hdev->power_off, hci_power_off); + setup_timer(&hdev->off_timer, hci_auto_off, (unsigned long) hdev); + memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); atomic_set(&hdev->promisc, 0); @@ -969,7 +1023,10 @@ int hci_register_dev(struct hci_dev *hdev) } } - mgmt_index_added(hdev->id); + set_bit(HCI_AUTO_OFF, &hdev->flags); + set_bit(HCI_SETUP, &hdev->flags); + queue_work(hdev->workqueue, &hdev->power_on); + hci_notify(hdev, HCI_DEV_REG); return id; @@ -999,7 +1056,10 @@ int hci_unregister_dev(struct hci_dev *hdev) for (i = 0; i < NUM_REASSEMBLY; i++) kfree_skb(hdev->reassembly[i]); - mgmt_index_removed(hdev->id); + if (!test_bit(HCI_INIT, &hdev->flags) && + !test_bit(HCI_SETUP, &hdev->flags)) + mgmt_index_removed(hdev->id); + hci_notify(hdev, HCI_DEV_UNREG); if (hdev->rfkill) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index ace8726..d479e24 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -129,6 +129,12 @@ static int read_index_list(struct sock *sk) i = 0; list_for_each(p, &hci_dev_list) { struct hci_dev *d = list_entry(p, struct hci_dev, list); + + hci_del_off_timer(d); + + if (test_bit(HCI_SETUP, &d->flags)) + continue; + put_unaligned_le16(d->id, &rp->index[i++]); BT_DBG("Added hci%u", d->id); } @@ -180,6 +186,8 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); } + hci_del_off_timer(hdev); + hci_dev_lock_bh(hdev); put_unaligned_le16(hdev->id, &rp->index); -- cgit v1.1 From 5add6af8fcbce269cac2457584c0ebfda055474a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 16 Dec 2010 10:00:37 +0200 Subject: Bluetooth: Add support for management powered event This patch adds support for the powered event that's used to indicate to userspace when the powered state of a local adapter changes. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 4 ++++ net/bluetooth/mgmt.c | 10 ++++++++++ 2 files changed, 14 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b22ce9f..c5a78e7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -535,6 +535,8 @@ int hci_dev_open(__u16 dev) hci_dev_hold(hdev); set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); + if (!test_bit(HCI_SETUP, &hdev->flags)) + mgmt_powered(hdev->id, 1); } else { /* Init failed, cleanup */ tasklet_kill(&hdev->rx_task); @@ -616,6 +618,8 @@ static int hci_dev_do_close(struct hci_dev *hdev) * and no tasks are scheduled. */ hdev->close(hdev); + mgmt_powered(hdev->id, 0); + /* Clear flags */ hdev->flags = 0; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d479e24..f746e19 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -316,3 +316,13 @@ int mgmt_index_removed(u16 index) return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev)); } + +int mgmt_powered(u16 index, u8 powered) +{ + struct mgmt_ev_powered ev; + + put_unaligned_le16(index, &ev.index); + ev.powered = powered; + + return mgmt_event(MGMT_EV_POWERED, &ev, sizeof(ev)); +} -- cgit v1.1 From eec8d2bcc841ae44edcde9660ff21144a2016053 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 16 Dec 2010 10:17:38 +0200 Subject: Bluetooth: Add support for set_powered management command This patch adds a set_powered command to the management interface through which the powered state of local adapters can be controlled. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 4 +- net/bluetooth/hci_event.c | 2 +- net/bluetooth/hci_sock.c | 6 +- net/bluetooth/mgmt.c | 200 ++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 203 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c5a78e7..dfc4ef9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1377,7 +1377,7 @@ static int hci_send_frame(struct sk_buff *skb) /* Time stamp */ __net_timestamp(skb); - hci_send_to_sock(hdev, skb); + hci_send_to_sock(hdev, skb, NULL); } /* Get rid of skb owner, prior to sending to the driver. */ @@ -1767,7 +1767,7 @@ static void hci_rx_task(unsigned long arg) while ((skb = skb_dequeue(&hdev->rx_q))) { if (atomic_read(&hdev->promisc)) { /* Send copy to the sockets */ - hci_send_to_sock(hdev, skb); + hci_send_to_sock(hdev, skb, NULL); } if (test_bit(HCI_RAW, &hdev->flags)) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a290854..d42fb35 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2083,6 +2083,6 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) bt_cb(skb)->pkt_type = HCI_EVENT_PKT; skb->dev = (void *) hdev; - hci_send_to_sock(hdev, skb); + hci_send_to_sock(hdev, skb, NULL); kfree_skb(skb); } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 29827c7..d50e961 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -85,7 +85,8 @@ static struct bt_sock_list hci_sk_list = { }; /* Send frame to RAW socket */ -void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) +void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb, + struct sock *skip_sk) { struct sock *sk; struct hlist_node *node; @@ -97,6 +98,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) struct hci_filter *flt; struct sk_buff *nskb; + if (sk == skip_sk) + continue; + if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev) continue; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f746e19..b65b6ca 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -32,6 +32,16 @@ #define MGMT_VERSION 0 #define MGMT_REVISION 1 +struct pending_cmd { + struct list_head list; + __u16 opcode; + int index; + void *cmd; + struct sock *sk; +}; + +LIST_HEAD(cmd_list); + static int cmd_status(struct sock *sk, u16 cmd, u8 status) { struct sk_buff *skb; @@ -220,6 +230,129 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) return 0; } +static void mgmt_pending_free(struct pending_cmd *cmd) +{ + sock_put(cmd->sk); + kfree(cmd->cmd); + kfree(cmd); +} + +static int mgmt_pending_add(struct sock *sk, u16 opcode, int index, + void *data, u16 len) +{ + struct pending_cmd *cmd; + + cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC); + if (!cmd) + return -ENOMEM; + + cmd->opcode = opcode; + cmd->index = index; + + cmd->cmd = kmalloc(len, GFP_ATOMIC); + if (!cmd->cmd) { + kfree(cmd); + return -ENOMEM; + } + + memcpy(cmd->cmd, data, len); + + cmd->sk = sk; + sock_hold(sk); + + list_add(&cmd->list, &cmd_list); + + return 0; +} + +static void mgmt_pending_foreach(u16 opcode, int index, + void (*cb)(struct pending_cmd *cmd, void *data), + void *data) +{ + struct list_head *p, *n; + + list_for_each_safe(p, n, &cmd_list) { + struct pending_cmd *cmd; + + cmd = list_entry(p, struct pending_cmd, list); + + if (cmd->opcode != opcode) + continue; + + if (index >= 0 && cmd->index != index) + continue; + + cb(cmd, data); + } +} + +static struct pending_cmd *mgmt_pending_find(u16 opcode, int index) +{ + struct list_head *p; + + list_for_each(p, &cmd_list) { + struct pending_cmd *cmd; + + cmd = list_entry(p, struct pending_cmd, list); + + if (cmd->opcode != opcode) + continue; + + if (index >= 0 && cmd->index != index) + continue; + + return cmd; + } + + return NULL; +} + +static int set_powered(struct sock *sk, unsigned char *data, u16 len) +{ + struct mgmt_cp_set_powered *cp; + struct hci_dev *hdev; + u16 dev_id; + int ret, up; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_SET_POWERED, ENODEV); + + hci_dev_lock_bh(hdev); + + up = test_bit(HCI_UP, &hdev->flags); + if ((cp->powered && up) || (!cp->powered && !up)) { + ret = cmd_status(sk, MGMT_OP_SET_POWERED, EALREADY); + goto failed; + } + + if (mgmt_pending_find(MGMT_OP_SET_POWERED, dev_id)) { + ret = cmd_status(sk, MGMT_OP_SET_POWERED, EBUSY); + goto failed; + } + + ret = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, dev_id, data, len); + if (ret < 0) + goto failed; + + if (cp->powered) + queue_work(hdev->workqueue, &hdev->power_on); + else + queue_work(hdev->workqueue, &hdev->power_off); + + ret = 0; + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + return ret; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -260,6 +393,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_READ_INFO: err = read_controller_info(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_SET_POWERED: + err = set_powered(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); @@ -276,7 +412,7 @@ done: return err; } -static int mgmt_event(u16 event, void *data, u16 data_len) +static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk) { struct sk_buff *skb; struct mgmt_hdr *hdr; @@ -293,7 +429,7 @@ static int mgmt_event(u16 event, void *data, u16 data_len) memcpy(skb_put(skb, data_len), data, data_len); - hci_send_to_sock(NULL, skb); + hci_send_to_sock(NULL, skb, skip_sk); kfree_skb(skb); return 0; @@ -305,7 +441,7 @@ int mgmt_index_added(u16 index) put_unaligned_le16(index, &ev.index); - return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev)); + return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev), NULL); } int mgmt_index_removed(u16 index) @@ -314,15 +450,69 @@ int mgmt_index_removed(u16 index) put_unaligned_le16(index, &ev.index); - return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev)); + return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev), NULL); +} + +struct powered_lookup { + u8 powered; + struct sock *sk; +}; + +static void power_rsp(struct pending_cmd *cmd, void *data) +{ + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_set_powered *rp; + struct mgmt_cp_set_powered *cp = cmd->cmd; + struct sk_buff *skb; + struct powered_lookup *match = data; + + if (cp->powered != match->powered) + return; + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + if (!skb) + return; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(cmd->opcode, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + put_unaligned_le16(cmd->index, &rp->index); + rp->powered = cp->powered; + + if (sock_queue_rcv_skb(cmd->sk, skb) < 0) + kfree_skb(skb); + + list_del(&cmd->list); + + if (match->sk == NULL) { + match->sk = cmd->sk; + sock_hold(match->sk); + } + + mgmt_pending_free(cmd); } int mgmt_powered(u16 index, u8 powered) { struct mgmt_ev_powered ev; + struct powered_lookup match = { powered, NULL }; + int ret; put_unaligned_le16(index, &ev.index); ev.powered = powered; - return mgmt_event(MGMT_EV_POWERED, &ev, sizeof(ev)); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, power_rsp, &match); + + ret = mgmt_event(MGMT_EV_POWERED, &ev, sizeof(ev), match.sk); + + if (match.sk) + sock_put(match.sk); + + return ret; } -- cgit v1.1 From 73f22f62388795c0f6b4f3f97bda7a64f9681aac Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 29 Dec 2010 16:00:25 +0200 Subject: Bluetooth: Add support for set_discoverable management command This patch adds a set_discoverable command to the management interface as well as the corresponding event. The command is used to control the discoverable state of adapters. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 5 +- net/bluetooth/mgmt.c | 142 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 141 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d42fb35..f55004a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -278,8 +278,11 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_PSCAN, &hdev->flags); clear_bit(HCI_ISCAN, &hdev->flags); - if (param & SCAN_INQUIRY) + if (param & SCAN_INQUIRY) { set_bit(HCI_ISCAN, &hdev->flags); + mgmt_discoverable(hdev->id, 1); + } else + mgmt_discoverable(hdev->id, 0); if (param & SCAN_PAGE) set_bit(HCI_PSCAN, &hdev->flags); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b65b6ca..5fa3034 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -307,6 +307,18 @@ static struct pending_cmd *mgmt_pending_find(u16 opcode, int index) return NULL; } +static void mgmt_pending_remove(u16 opcode, int index) +{ + struct pending_cmd *cmd; + + cmd = mgmt_pending_find(opcode, index); + if (cmd == NULL) + return; + + list_del(&cmd->list); + mgmt_pending_free(cmd); +} + static int set_powered(struct sock *sk, unsigned char *data, u16 len) { struct mgmt_cp_set_powered *cp; @@ -353,6 +365,63 @@ failed: return ret; } +static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) +{ + struct mgmt_cp_set_discoverable *cp; + struct hci_dev *hdev; + u16 dev_id; + u8 scan; + int err; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, ENETDOWN); + goto failed; + } + + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id) || + hci_sent_cmd_data(hdev, HCI_OP_WRITE_SCAN_ENABLE)) { + err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EBUSY); + goto failed; + } + + if (cp->discoverable == test_bit(HCI_ISCAN, &hdev->flags) && + test_bit(HCI_PSCAN, &hdev->flags)) { + err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EALREADY); + goto failed; + } + + err = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, dev_id, data, len); + if (err < 0) + goto failed; + + scan = SCAN_PAGE; + + if (cp->discoverable) + scan |= SCAN_INQUIRY; + + err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (err < 0) + mgmt_pending_remove(MGMT_OP_SET_DISCOVERABLE, dev_id); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -396,6 +465,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_SET_POWERED: err = set_powered(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_SET_DISCOVERABLE: + err = set_discoverable(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); @@ -453,8 +525,8 @@ int mgmt_index_removed(u16 index) return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev), NULL); } -struct powered_lookup { - u8 powered; +struct cmd_lookup { + u8 value; struct sock *sk; }; @@ -465,9 +537,9 @@ static void power_rsp(struct pending_cmd *cmd, void *data) struct mgmt_rp_set_powered *rp; struct mgmt_cp_set_powered *cp = cmd->cmd; struct sk_buff *skb; - struct powered_lookup *match = data; + struct cmd_lookup *match = data; - if (cp->powered != match->powered) + if (cp->powered != match->value) return; skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); @@ -501,7 +573,7 @@ static void power_rsp(struct pending_cmd *cmd, void *data) int mgmt_powered(u16 index, u8 powered) { struct mgmt_ev_powered ev; - struct powered_lookup match = { powered, NULL }; + struct cmd_lookup match = { powered, NULL }; int ret; put_unaligned_le16(index, &ev.index); @@ -516,3 +588,63 @@ int mgmt_powered(u16 index, u8 powered) return ret; } + +static void discoverable_rsp(struct pending_cmd *cmd, void *data) +{ + struct mgmt_cp_set_discoverable *cp = cmd->cmd; + struct cmd_lookup *match = data; + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_set_discoverable *rp; + + if (cp->discoverable != match->value) + return; + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + if (!skb) + return; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_SET_DISCOVERABLE, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + put_unaligned_le16(cmd->index, &rp->index); + rp->discoverable = cp->discoverable; + + if (sock_queue_rcv_skb(cmd->sk, skb) < 0) + kfree_skb(skb); + + list_del(&cmd->list); + + if (match->sk == NULL) { + match->sk = cmd->sk; + sock_hold(match->sk); + } + + mgmt_pending_free(cmd); +} + +int mgmt_discoverable(u16 index, u8 discoverable) +{ + struct mgmt_ev_discoverable ev; + struct cmd_lookup match = { discoverable, NULL }; + int ret; + + put_unaligned_le16(index, &ev.index); + ev.discoverable = discoverable; + + mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, + discoverable_rsp, &match); + + ret = mgmt_event(MGMT_EV_DISCOVERABLE, &ev, sizeof(ev), match.sk); + + if (match.sk) + sock_put(match.sk); + + return ret; +} -- cgit v1.1 From 9fbcbb455dd01abfad4f314b618ac51d566114cb Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 30 Dec 2010 00:18:33 +0200 Subject: Bluetooth: Add set_connectable management command This patch adds a set_connectable command as well as a corresponding event to the management interface. It's mainly useful for setting an adapter as connectable from a non-initialized state as well as setting an already initialized adapter as non-connectable (mostly useful for qualification purposes). Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 16 ++++-- net/bluetooth/mgmt.c | 122 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f55004a..a8a38f1 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -274,18 +274,24 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) if (!status) { __u8 param = *((__u8 *) sent); + int old_pscan, old_iscan; - clear_bit(HCI_PSCAN, &hdev->flags); - clear_bit(HCI_ISCAN, &hdev->flags); + old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags); + old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags); if (param & SCAN_INQUIRY) { set_bit(HCI_ISCAN, &hdev->flags); - mgmt_discoverable(hdev->id, 1); - } else + if (!old_iscan) + mgmt_discoverable(hdev->id, 1); + } else if (old_iscan) mgmt_discoverable(hdev->id, 0); - if (param & SCAN_PAGE) + if (param & SCAN_PAGE) { set_bit(HCI_PSCAN, &hdev->flags); + if (!old_pscan) + mgmt_connectable(hdev->id, 1); + } else if (old_pscan) + mgmt_connectable(hdev->id, 0); } hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 5fa3034..fc41cfc 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -204,6 +204,7 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) rp->type = hdev->dev_type; rp->powered = test_bit(HCI_UP, &hdev->flags); + rp->connectable = test_bit(HCI_PSCAN, &hdev->flags); rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags); rp->pairable = test_bit(HCI_PSCAN, &hdev->flags); @@ -390,8 +391,7 @@ static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) } if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id) || - hci_sent_cmd_data(hdev, HCI_OP_WRITE_SCAN_ENABLE)) { + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id)) { err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EBUSY); goto failed; } @@ -422,6 +422,61 @@ failed: return err; } +static int set_connectable(struct sock *sk, unsigned char *data, u16 len) +{ + struct mgmt_cp_set_connectable *cp; + struct hci_dev *hdev; + u16 dev_id; + u8 scan; + int err; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_SET_CONNECTABLE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, ENETDOWN); + goto failed; + } + + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id)) { + err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EBUSY); + goto failed; + } + + if (cp->connectable == test_bit(HCI_PSCAN, &hdev->flags)) { + err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EALREADY); + goto failed; + } + + err = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, dev_id, data, len); + if (err < 0) + goto failed; + + if (cp->connectable) + scan = SCAN_PAGE; + else + scan = 0; + + err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + if (err < 0) + mgmt_pending_remove(MGMT_OP_SET_CONNECTABLE, dev_id); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -468,6 +523,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_SET_DISCOVERABLE: err = set_discoverable(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_SET_CONNECTABLE: + err = set_connectable(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); @@ -648,3 +706,63 @@ int mgmt_discoverable(u16 index, u8 discoverable) return ret; } + +static void connectable_rsp(struct pending_cmd *cmd, void *data) +{ + struct mgmt_cp_set_connectable *cp = cmd->cmd; + struct cmd_lookup *match = data; + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_set_connectable *rp; + + if (cp->connectable != match->value) + return; + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + if (!skb) + return; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_SET_CONNECTABLE, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + put_unaligned_le16(cmd->index, &rp->index); + rp->connectable = cp->connectable; + + if (sock_queue_rcv_skb(cmd->sk, skb) < 0) + kfree_skb(skb); + + list_del(&cmd->list); + + if (match->sk == NULL) { + match->sk = cmd->sk; + sock_hold(match->sk); + } + + mgmt_pending_free(cmd); +} + +int mgmt_connectable(u16 index, u8 connectable) +{ + struct mgmt_ev_connectable ev; + struct cmd_lookup match = { connectable, NULL }; + int ret; + + put_unaligned_le16(index, &ev.index); + ev.connectable = connectable; + + mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, + connectable_rsp, &match); + + ret = mgmt_event(MGMT_EV_CONNECTABLE, &ev, sizeof(ev), match.sk); + + if (match.sk) + sock_put(match.sk); + + return ret; +} -- cgit v1.1 From 72a734ec1aca8cd2ef3fc85428c11bde662e149e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 30 Dec 2010 00:38:22 +0200 Subject: Bluetooth: Unify mode related management messages to a single struct The powered, connectable and discoverable messages all have the same format. By using a single struct for all of them a lot of code can be simplified and reused. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 137 +++++++++++---------------------------------------- 1 file changed, 28 insertions(+), 109 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fc41cfc..dbb1e57 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -322,7 +322,7 @@ static void mgmt_pending_remove(u16 opcode, int index) static int set_powered(struct sock *sk, unsigned char *data, u16 len) { - struct mgmt_cp_set_powered *cp; + struct mgmt_mode *cp; struct hci_dev *hdev; u16 dev_id; int ret, up; @@ -339,7 +339,7 @@ static int set_powered(struct sock *sk, unsigned char *data, u16 len) hci_dev_lock_bh(hdev); up = test_bit(HCI_UP, &hdev->flags); - if ((cp->powered && up) || (!cp->powered && !up)) { + if ((cp->val && up) || (!cp->val && !up)) { ret = cmd_status(sk, MGMT_OP_SET_POWERED, EALREADY); goto failed; } @@ -353,7 +353,7 @@ static int set_powered(struct sock *sk, unsigned char *data, u16 len) if (ret < 0) goto failed; - if (cp->powered) + if (cp->val) queue_work(hdev->workqueue, &hdev->power_on); else queue_work(hdev->workqueue, &hdev->power_off); @@ -368,7 +368,7 @@ failed: static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) { - struct mgmt_cp_set_discoverable *cp; + struct mgmt_mode *cp; struct hci_dev *hdev; u16 dev_id; u8 scan; @@ -396,7 +396,7 @@ static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) goto failed; } - if (cp->discoverable == test_bit(HCI_ISCAN, &hdev->flags) && + if (cp->val == test_bit(HCI_ISCAN, &hdev->flags) && test_bit(HCI_PSCAN, &hdev->flags)) { err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EALREADY); goto failed; @@ -408,7 +408,7 @@ static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) scan = SCAN_PAGE; - if (cp->discoverable) + if (cp->val) scan |= SCAN_INQUIRY; err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); @@ -424,7 +424,7 @@ failed: static int set_connectable(struct sock *sk, unsigned char *data, u16 len) { - struct mgmt_cp_set_connectable *cp; + struct mgmt_mode *cp; struct hci_dev *hdev; u16 dev_id; u8 scan; @@ -452,7 +452,7 @@ static int set_connectable(struct sock *sk, unsigned char *data, u16 len) goto failed; } - if (cp->connectable == test_bit(HCI_PSCAN, &hdev->flags)) { + if (cp->val == test_bit(HCI_PSCAN, &hdev->flags)) { err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EALREADY); goto failed; } @@ -461,7 +461,7 @@ static int set_connectable(struct sock *sk, unsigned char *data, u16 len) if (err < 0) goto failed; - if (cp->connectable) + if (cp->val) scan = SCAN_PAGE; else scan = 0; @@ -584,20 +584,20 @@ int mgmt_index_removed(u16 index) } struct cmd_lookup { - u8 value; + u8 val; struct sock *sk; }; -static void power_rsp(struct pending_cmd *cmd, void *data) +static void mode_rsp(struct pending_cmd *cmd, void *data) { struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; - struct mgmt_rp_set_powered *rp; - struct mgmt_cp_set_powered *cp = cmd->cmd; + struct mgmt_mode *rp; + struct mgmt_mode *cp = cmd->cmd; struct sk_buff *skb; struct cmd_lookup *match = data; - if (cp->powered != match->value) + if (cp->val != match->val) return; skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); @@ -613,7 +613,7 @@ static void power_rsp(struct pending_cmd *cmd, void *data) rp = (void *) skb_put(skb, sizeof(*rp)); put_unaligned_le16(cmd->index, &rp->index); - rp->powered = cp->powered; + rp->val = cp->val; if (sock_queue_rcv_skb(cmd->sk, skb) < 0) kfree_skb(skb); @@ -630,14 +630,14 @@ static void power_rsp(struct pending_cmd *cmd, void *data) int mgmt_powered(u16 index, u8 powered) { - struct mgmt_ev_powered ev; + struct mgmt_mode ev; struct cmd_lookup match = { powered, NULL }; int ret; - put_unaligned_le16(index, &ev.index); - ev.powered = powered; + mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, mode_rsp, &match); - mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, power_rsp, &match); + put_unaligned_le16(index, &ev.index); + ev.val = powered; ret = mgmt_event(MGMT_EV_POWERED, &ev, sizeof(ev), match.sk); @@ -647,57 +647,17 @@ int mgmt_powered(u16 index, u8 powered) return ret; } -static void discoverable_rsp(struct pending_cmd *cmd, void *data) -{ - struct mgmt_cp_set_discoverable *cp = cmd->cmd; - struct cmd_lookup *match = data; - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - struct mgmt_rp_set_discoverable *rp; - - if (cp->discoverable != match->value) - return; - - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); - if (!skb) - return; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_SET_DISCOVERABLE, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp)); - put_unaligned_le16(cmd->index, &rp->index); - rp->discoverable = cp->discoverable; - - if (sock_queue_rcv_skb(cmd->sk, skb) < 0) - kfree_skb(skb); - - list_del(&cmd->list); - - if (match->sk == NULL) { - match->sk = cmd->sk; - sock_hold(match->sk); - } - - mgmt_pending_free(cmd); -} - int mgmt_discoverable(u16 index, u8 discoverable) { - struct mgmt_ev_discoverable ev; + struct mgmt_mode ev; struct cmd_lookup match = { discoverable, NULL }; int ret; - put_unaligned_le16(index, &ev.index); - ev.discoverable = discoverable; - mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, - discoverable_rsp, &match); + mode_rsp, &match); + + put_unaligned_le16(index, &ev.index); + ev.val = discoverable; ret = mgmt_event(MGMT_EV_DISCOVERABLE, &ev, sizeof(ev), match.sk); @@ -707,57 +667,16 @@ int mgmt_discoverable(u16 index, u8 discoverable) return ret; } -static void connectable_rsp(struct pending_cmd *cmd, void *data) -{ - struct mgmt_cp_set_connectable *cp = cmd->cmd; - struct cmd_lookup *match = data; - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - struct mgmt_rp_set_connectable *rp; - - if (cp->connectable != match->value) - return; - - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); - if (!skb) - return; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_SET_CONNECTABLE, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp)); - put_unaligned_le16(cmd->index, &rp->index); - rp->connectable = cp->connectable; - - if (sock_queue_rcv_skb(cmd->sk, skb) < 0) - kfree_skb(skb); - - list_del(&cmd->list); - - if (match->sk == NULL) { - match->sk = cmd->sk; - sock_hold(match->sk); - } - - mgmt_pending_free(cmd); -} - int mgmt_connectable(u16 index, u8 connectable) { - struct mgmt_ev_connectable ev; + struct mgmt_mode ev; struct cmd_lookup match = { connectable, NULL }; int ret; - put_unaligned_le16(index, &ev.index); - ev.connectable = connectable; + mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, mode_rsp, &match); - mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, - connectable_rsp, &match); + put_unaligned_le16(index, &ev.index); + ev.val = connectable; ret = mgmt_event(MGMT_EV_CONNECTABLE, &ev, sizeof(ev), match.sk); -- cgit v1.1 From ebc99feba7378349e2bfae7018af062767382f6c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 4 Jan 2011 11:54:26 +0200 Subject: Bluetooth: Add flag to track managment controlled adapters This patch adds a HCI_MGMT flag to track adapters which are under the control of the management interface. This is needed to make sure that new kernels will work with old user space versions. I.e. behaviour which could break old user space versions (but is needed by the management interface) should not be exhibited when the HCI_MGMT flag is not set. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index dbb1e57..5f871b3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -142,6 +142,8 @@ static int read_index_list(struct sock *sk) hci_del_off_timer(d); + set_bit(HCI_MGMT, &d->flags); + if (test_bit(HCI_SETUP, &d->flags)) continue; @@ -200,6 +202,8 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) hci_dev_lock_bh(hdev); + set_bit(HCI_MGMT, &hdev->flags); + put_unaligned_le16(hdev->id, &rp->index); rp->type = hdev->dev_type; -- cgit v1.1 From 053f0211d3b1a991f06a7b4aec5b762e42d7c6a4 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 26 Jan 2011 13:07:10 +0200 Subject: Bluetooth: Add send_mode_rsp convenience function for mgmt.c Several management commands have similar responses but they are not always sent asynchronously. To enable synchronous sending (from the managment command handler function) a send_mode_rsp function is added. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 5f871b3..13872ae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -481,6 +481,34 @@ failed: return err; } +static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val) +{ + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_mode *rp; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(opcode, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + put_unaligned_le16(index, &rp->index); + rp->val = val; + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -594,33 +622,13 @@ struct cmd_lookup { static void mode_rsp(struct pending_cmd *cmd, void *data) { - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - struct mgmt_mode *rp; struct mgmt_mode *cp = cmd->cmd; - struct sk_buff *skb; struct cmd_lookup *match = data; if (cp->val != match->val) return; - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); - if (!skb) - return; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(cmd->opcode, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp)); - put_unaligned_le16(cmd->index, &rp->index); - rp->val = cp->val; - - if (sock_queue_rcv_skb(cmd->sk, skb) < 0) - kfree_skb(skb); + send_mode_rsp(cmd->sk, cmd->opcode, cmd->index, cp->val); list_del(&cmd->list); -- cgit v1.1 From c542a06c29acbf4ea0024884a198065a10613147 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 26 Jan 2011 13:11:03 +0200 Subject: Bluetooth: Implement set_pairable managment command This patch implements a new set_pairable management command to control the pairable state of local adapters. The state is represented using a new HCI_PAIRABLE flag in the hci_dev struct. For backwards compatibility with older user space versions the HCI_PAIRABLE flag gets automatically set when the existence of an adapter is reported to user space through legacy methods and the HCI_MGMT flag is not set. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 10 ++++++ net/bluetooth/mgmt.c | 88 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 75 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index dfc4ef9..13eb5a8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -799,10 +799,17 @@ int hci_get_dev_list(void __user *arg) read_lock_bh(&hci_dev_list_lock); list_for_each(p, &hci_dev_list) { struct hci_dev *hdev; + hdev = list_entry(p, struct hci_dev, list); + hci_del_off_timer(hdev); + + if (!test_bit(HCI_MGMT, &hdev->flags)) + set_bit(HCI_PAIRABLE, &hdev->flags); + (dr + n)->dev_id = hdev->id; (dr + n)->dev_opt = hdev->flags; + if (++n >= dev_num) break; } @@ -832,6 +839,9 @@ int hci_get_dev_info(void __user *arg) hci_del_off_timer(hdev); + if (!test_bit(HCI_MGMT, &hdev->flags)) + set_bit(HCI_PAIRABLE, &hdev->flags); + strcpy(di.name, hdev->name); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 13872ae..d107350 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -481,6 +481,29 @@ failed: return err; } +static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + + skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + bt_cb(skb)->channel = HCI_CHANNEL_CONTROL; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(event); + hdr->len = cpu_to_le16(data_len); + + memcpy(skb_put(skb, data_len), data, data_len); + + hci_send_to_sock(NULL, skb, skip_sk); + kfree_skb(skb); + + return 0; +} + static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val) { struct mgmt_hdr *hdr; @@ -509,6 +532,45 @@ static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val) return 0; } +static int set_pairable(struct sock *sk, unsigned char *data, u16 len) +{ + struct mgmt_mode *cp, ev; + struct hci_dev *hdev; + u16 dev_id; + int err; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_SET_PAIRABLE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (cp->val) + set_bit(HCI_PAIRABLE, &hdev->flags); + else + clear_bit(HCI_PAIRABLE, &hdev->flags); + + err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, dev_id, cp->val); + if (err < 0) + goto failed; + + put_unaligned_le16(dev_id, &ev.index); + ev.val = cp->val; + + err = mgmt_event(MGMT_EV_PAIRABLE, &ev, sizeof(ev), sk); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -558,6 +620,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_SET_CONNECTABLE: err = set_connectable(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_SET_PAIRABLE: + err = set_pairable(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); @@ -574,29 +639,6 @@ done: return err; } -static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk) -{ - struct sk_buff *skb; - struct mgmt_hdr *hdr; - - skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC); - if (!skb) - return -ENOMEM; - - bt_cb(skb)->channel = HCI_CHANNEL_CONTROL; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(event); - hdr->len = cpu_to_le16(data_len); - - memcpy(skb_put(skb, data_len), data, data_len); - - hci_send_to_sock(NULL, skb, skip_sk); - kfree_skb(skb); - - return 0; -} - int mgmt_index_added(u16 index) { struct mgmt_ev_index_added ev; -- cgit v1.1 From 2aeb9a1ae0e34fb46cb78b82f827a6a54ab65111 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 4 Jan 2011 12:08:51 +0200 Subject: Bluetooth: Implement UUID handling through the management interface This patch adds methods to the management interface for userspace to notify the kernel of which services have been registered for specific adapters. This information is needed for setting the appropriate Class of Device value as well as the Extended Inquiry Response value. This patch doesn't actually implement setting of these values but just provides the storage of the UUIDs so the needed functionality can be built on top of it. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 19 ++++++++ net/bluetooth/mgmt.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 13eb5a8..b99248d 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -955,6 +955,22 @@ void hci_del_off_timer(struct hci_dev *hdev) del_timer(&hdev->off_timer); } +int hci_uuids_clear(struct hci_dev *hdev) +{ + struct list_head *p, *n; + + list_for_each_safe(p, n, &hdev->uuids) { + struct bt_uuid *uuid; + + uuid = list_entry(p, struct bt_uuid, list); + + list_del(p); + kfree(uuid); + } + + return 0; +} + /* Register HCI device */ int hci_register_dev(struct hci_dev *hdev) { @@ -1012,6 +1028,8 @@ int hci_register_dev(struct hci_dev *hdev) INIT_LIST_HEAD(&hdev->blacklist); + INIT_LIST_HEAD(&hdev->uuids); + INIT_WORK(&hdev->power_on, hci_power_on); INIT_WORK(&hdev->power_off, hci_power_off); setup_timer(&hdev->off_timer, hci_auto_off, (unsigned long) hdev); @@ -1087,6 +1105,7 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_dev_lock_bh(hdev); hci_blacklist_clear(hdev); + hci_uuids_clear(hdev); hci_dev_unlock_bh(hdev); __hci_dev_put(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d107350..0854c2f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -571,6 +571,120 @@ failed: return err; } +static int uuid_rsp(struct sock *sk, u16 opcode, u16 index) +{ + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(index), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(index)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(opcode, &ev->opcode); + + put_unaligned_le16(index, skb_put(skb, sizeof(index))); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + +static int add_uuid(struct sock *sk, unsigned char *data, u16 len) +{ + struct mgmt_cp_add_uuid *cp; + struct hci_dev *hdev; + struct bt_uuid *uuid; + u16 dev_id; + int err; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_ADD_UUID, ENODEV); + + hci_dev_lock_bh(hdev); + + uuid = kmalloc(sizeof(*uuid), GFP_ATOMIC); + if (!uuid) { + err = -ENOMEM; + goto failed; + } + + memcpy(uuid->uuid, cp->uuid, 16); + + list_add(&uuid->list, &hdev->uuids); + + err = uuid_rsp(sk, MGMT_OP_ADD_UUID, dev_id); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) +{ + struct list_head *p, *n; + struct mgmt_cp_add_uuid *cp; + struct hci_dev *hdev; + u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + u16 dev_id; + int err, found; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_REMOVE_UUID, ENODEV); + + hci_dev_lock_bh(hdev); + + if (memcmp(cp->uuid, bt_uuid_any, 16) == 0) { + err = hci_uuids_clear(hdev); + goto unlock; + } + + found = 0; + + list_for_each_safe(p, n, &hdev->uuids) { + struct bt_uuid *match = list_entry(p, struct bt_uuid, list); + + if (memcmp(match->uuid, cp->uuid, 16) != 0) + continue; + + list_del(&match->list); + found++; + } + + if (found == 0) { + err = cmd_status(sk, MGMT_OP_REMOVE_UUID, ENOENT); + goto unlock; + } + + err = uuid_rsp(sk, MGMT_OP_REMOVE_UUID, dev_id); + +unlock: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -623,6 +737,12 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_SET_PAIRABLE: err = set_pairable(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_ADD_UUID: + err = add_uuid(sk, buf + sizeof(*hdr), len); + break; + case MGMT_OP_REMOVE_UUID: + err = remove_uuid(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); -- cgit v1.1 From 930e13363fb0e94db6e8b59c54dfb5c59355113e Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 4 Jan 2011 11:39:44 +0200 Subject: Bluetooth: Implement debugfs support for listing UUIDs This patch adds a debugfs entry to list the UUIDs that have been registered through the management interface. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_sysfs.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 5fce3d6..23471dd 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -461,6 +461,56 @@ static const struct file_operations blacklist_fops = { .llseek = seq_lseek, .release = single_release, }; + +static void print_bt_uuid(struct seq_file *f, u8 *uuid) +{ + u32 data0, data4; + u16 data1, data2, data3, data5; + + memcpy(&data0, &uuid[0], 4); + memcpy(&data1, &uuid[4], 2); + memcpy(&data2, &uuid[6], 2); + memcpy(&data3, &uuid[8], 2); + memcpy(&data4, &uuid[10], 4); + memcpy(&data5, &uuid[14], 2); + + seq_printf(f, "%.8x-%.4x-%.4x-%.4x-%.8x%.4x\n", + ntohl(data0), ntohs(data1), ntohs(data2), + ntohs(data3), ntohl(data4), ntohs(data5)); +} + +static int uuids_show(struct seq_file *f, void *p) +{ + struct hci_dev *hdev = f->private; + struct list_head *l; + + hci_dev_lock_bh(hdev); + + list_for_each(l, &hdev->uuids) { + struct bt_uuid *uuid; + + uuid = list_entry(l, struct bt_uuid, list); + + print_bt_uuid(f, uuid->uuid); + } + + hci_dev_unlock_bh(hdev); + + return 0; +} + +static int uuids_open(struct inode *inode, struct file *file) +{ + return single_open(file, uuids_show, inode->i_private); +} + +static const struct file_operations uuids_fops = { + .open = uuids_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + int hci_register_sysfs(struct hci_dev *hdev) { struct device *dev = &hdev->dev; @@ -493,6 +543,8 @@ int hci_register_sysfs(struct hci_dev *hdev) debugfs_create_file("blacklist", 0444, hdev->debugfs, hdev, &blacklist_fops); + debugfs_create_file("uuids", 0444, hdev->debugfs, hdev, &uuids_fops); + return 0; } -- cgit v1.1 From 03b555e119de8288a16e086e1fbd223d9b429d3d Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 4 Jan 2011 15:40:05 +0200 Subject: Bluetooth: Reject pairing requests when in non-pairable mode This patch adds the necessary logic to act accordingly when the HCI_PAIRABLE flag is not set. In that case PIN code replies as well as Secure Simple Pairing requests without a NoBonding requirement need to be rejected. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index a8a38f1..cf3014a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1595,6 +1595,10 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff hci_conn_put(conn); } + if (!test_bit(HCI_PAIRABLE, &hdev->flags)) + hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, + sizeof(ev->bdaddr), &ev->bdaddr); + hci_dev_unlock(hdev); } @@ -1885,9 +1889,52 @@ static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (conn) - hci_conn_hold(conn); + if (!conn) + goto unlock; + + hci_conn_hold(conn); + + if (!test_bit(HCI_MGMT, &hdev->flags)) + goto unlock; + + if (test_bit(HCI_PAIRABLE, &hdev->flags) || + (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) { + /* FIXME: Do IO capa response based on information + * provided through the management interface */ + } else { + struct hci_cp_io_capability_neg_reply cp; + + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.reason = 0x16; /* Pairing not allowed */ + hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_NEG_REPLY, + sizeof(cp), &cp); + } + +unlock: + hci_dev_unlock(hdev); +} + +static inline void hci_io_capa_reply_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_io_capa_reply *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (!conn) + goto unlock; + + hci_conn_hold(conn); + + conn->remote_cap = ev->capability; + conn->remote_oob = ev->oob_data; + conn->remote_auth = ev->authentication; + +unlock: hci_dev_unlock(hdev); } @@ -2051,6 +2098,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_io_capa_request_evt(hdev, skb); break; + case HCI_EV_IO_CAPA_REPLY: + hci_io_capa_reply_evt(hdev, skb); + break; + case HCI_EV_SIMPLE_PAIR_COMPLETE: hci_simple_pair_complete_evt(hdev, skb); break; -- cgit v1.1 From a5040efa2017f3e4f1b4d5f40fd989567f3994c1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 10 Jan 2011 13:28:59 +0200 Subject: Bluetooth: Add special handling with __hci_request and HCI_INIT To support a more dynamic HCI initialization sequence the __hci_request behavior requires some more changes. Particularly, the init sequence should be able to have conditionals in it (sending some HCI commands depending on the outcome of a previous command) instead of being a fixed list as it is right now. The reasons for these additional requirements are the moving all previously user space driven initialization commands to the kernel side as well as the support the Low Energy controllers. To fulfull these requirements the init sequence is made the only special case for multi-command requests and req_last_cmd is renamed to init_last_cmd. The hci_send_cmd function is changed to update init_last_cmd as long as the HCI_INIT flag is set. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b99248d..183ce81 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -97,11 +97,10 @@ void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) { BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result); - /* If the request has set req_last_cmd (typical for multi-HCI - * command requests) check if the completed command matches - * this, and if not just return. Single HCI command requests - * typically leave req_last_cmd as 0 */ - if (hdev->req_last_cmd && cmd != hdev->req_last_cmd) + /* If this is the init phase check if the completed command matches + * the last init command, and if not just return. + */ + if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) return; if (hdev->req_status == HCI_REQ_PEND) { @@ -158,7 +157,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, break; } - hdev->req_last_cmd = hdev->req_status = hdev->req_result = 0; + hdev->req_status = hdev->req_result = 0; BT_DBG("%s end: err %d", hdev->name, err); @@ -261,8 +260,6 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); - - hdev->req_last_cmd = HCI_OP_WRITE_CA_TIMEOUT; } static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) @@ -523,6 +520,7 @@ int hci_dev_open(__u16 dev) if (!test_bit(HCI_RAW, &hdev->flags)) { atomic_set(&hdev->cmd_cnt, 1); set_bit(HCI_INIT, &hdev->flags); + hdev->init_last_cmd = 0; //__hci_request(hdev, hci_reset_req, 0, HZ); ret = __hci_request(hdev, hci_init_req, 0, @@ -1442,6 +1440,9 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; skb->dev = (void *) hdev; + if (test_bit(HCI_INIT, &hdev->flags)) + hdev->init_last_cmd = opcode; + skb_queue_tail(&hdev->cmd_q, skb); tasklet_schedule(&hdev->cmd_task); -- cgit v1.1 From b0916ea0d9e6ea3ed46bb7a61c13a2b357b0248b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 10 Jan 2011 13:44:55 +0200 Subject: Bluetooth: Add controller side link key clearing to hci_init_req The controller may have link keys in its own memory and these keys could be used for secure connections. However, since the interface to access these keys doesn't provide information about the key types (which would be needed to infer the level of security each key provides) using these keys is rather useless. Therefore, simply clear the controller side list in the initialization procedure. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 5 +++++ net/bluetooth/hci_event.c | 14 ++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 183ce81..cedb8a9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -190,6 +190,7 @@ static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) static void hci_init_req(struct hci_dev *hdev, unsigned long opt) { + struct hci_cp_delete_stored_link_key cp; struct sk_buff *skb; __le16 param; __u8 flt_type; @@ -260,6 +261,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); + + bacpy(&cp.bdaddr, BDADDR_ANY); + cp.delete_all = 1; + hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); } static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index cf3014a..49b387c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -557,6 +557,16 @@ static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); } +static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status); +} + static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%x", hdev->name, status); @@ -1402,6 +1412,10 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_write_ca_timeout(hdev, skb); break; + case HCI_OP_DELETE_STORED_LINK_KEY: + hci_cc_delete_stored_link_key(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; -- cgit v1.1 From d83506003608910d24d5ace9ec06ad1bfd9ad110 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 10 Jan 2011 14:28:45 +0200 Subject: Bluetooth: Remove page timeout setting from HCI init sequence User space should set the page timeout so there's no need to explicitly set it in the HCI init sequence. Even if user space fails to set it the controller default value will be used. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index cedb8a9..748f5a6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -254,10 +254,6 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) flt_type = HCI_FLT_CLEAR_ALL; hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); - /* Page timeout ~20 secs */ - param = cpu_to_le16(0x8000); - hci_send_cmd(hdev, HCI_OP_WRITE_PG_TIMEOUT, 2, ¶m); - /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); -- cgit v1.1 From d5859e22cd40b73164b3e5d8d5d796f96edcc6af Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 25 Jan 2011 01:19:58 +0200 Subject: Bluetooth: Implement a more complete adapter initialization sequence Using the managment interface means that user space doesn't need to do any HCI command sending at all. This patch moves the remaining initialization commands from user space to the kernel side. The patch makes use of the new feature of __hci_request which allows the request to be dynamically modified while it is ongoing (something that is needed to react appropriately to the local features and the version of the adapter). Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 194 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 49b387c..c69ee44 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -424,6 +424,115 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) hdev->ssp_mode = *((__u8 *) sent); } +static u8 hci_get_inquiry_mode(struct hci_dev *hdev) +{ + if (hdev->features[6] & LMP_EXT_INQ) + return 2; + + if (hdev->features[3] & LMP_RSSI_INQ) + return 1; + + if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && + hdev->lmp_subver == 0x0757) + return 1; + + if (hdev->manufacturer == 15) { + if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) + return 1; + if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) + return 1; + if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) + return 1; + } + + if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && + hdev->lmp_subver == 0x1805) + return 1; + + return 0; +} + +static void hci_setup_inquiry_mode(struct hci_dev *hdev) +{ + u8 mode; + + mode = hci_get_inquiry_mode(hdev); + + hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); +} + +static void hci_setup_event_mask(struct hci_dev *hdev) +{ + /* The second byte is 0xff instead of 0x9f (two reserved bits + * disabled) since a Broadcom 1.2 dongle doesn't respond to the + * command otherwise */ + u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; + + /* Events for 1.2 and newer controllers */ + if (hdev->lmp_ver > 1) { + events[4] |= 0x01; /* Flow Specification Complete */ + events[4] |= 0x02; /* Inquiry Result with RSSI */ + events[4] |= 0x04; /* Read Remote Extended Features Complete */ + events[5] |= 0x08; /* Synchronous Connection Complete */ + events[5] |= 0x10; /* Synchronous Connection Changed */ + } + + if (hdev->features[3] & LMP_RSSI_INQ) + events[4] |= 0x04; /* Inquiry Result with RSSI */ + + if (hdev->features[5] & LMP_SNIFF_SUBR) + events[5] |= 0x20; /* Sniff Subrating */ + + if (hdev->features[5] & LMP_PAUSE_ENC) + events[5] |= 0x80; /* Encryption Key Refresh Complete */ + + if (hdev->features[6] & LMP_EXT_INQ) + events[5] |= 0x40; /* Extended Inquiry Result */ + + if (hdev->features[6] & LMP_NO_FLUSH) + events[7] |= 0x01; /* Enhanced Flush Complete */ + + if (hdev->features[7] & LMP_LSTO) + events[6] |= 0x80; /* Link Supervision Timeout Changed */ + + if (hdev->features[6] & LMP_SIMPLE_PAIR) { + events[6] |= 0x01; /* IO Capability Request */ + events[6] |= 0x02; /* IO Capability Response */ + events[6] |= 0x04; /* User Confirmation Request */ + events[6] |= 0x08; /* User Passkey Request */ + events[6] |= 0x10; /* Remote OOB Data Request */ + events[6] |= 0x20; /* Simple Pairing Complete */ + events[7] |= 0x04; /* User Passkey Notification */ + events[7] |= 0x08; /* Keypress Notification */ + events[7] |= 0x10; /* Remote Host Supported + * Features Notification */ + } + + if (hdev->features[4] & LMP_LE) + events[7] |= 0x20; /* LE Meta-Event */ + + hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); +} + +static void hci_setup(struct hci_dev *hdev) +{ + hci_setup_event_mask(hdev); + + if (hdev->lmp_ver > 1) + hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); + + if (hdev->features[6] & LMP_SIMPLE_PAIR) { + u8 mode = 0x01; + hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, sizeof(mode), &mode); + } + + if (hdev->features[3] & LMP_RSSI_INQ) + hci_setup_inquiry_mode(hdev); + + if (hdev->features[7] & LMP_INQ_TX_PWR) + hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); +} + static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_rp_read_local_version *rp = (void *) skb->data; @@ -435,11 +544,34 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) hdev->hci_ver = rp->hci_ver; hdev->hci_rev = __le16_to_cpu(rp->hci_rev); + hdev->lmp_ver = rp->lmp_ver; hdev->manufacturer = __le16_to_cpu(rp->manufacturer); + hdev->lmp_subver = __le16_to_cpu(rp->lmp_subver); BT_DBG("%s manufacturer %d hci ver %d:%d", hdev->name, hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); + + if (test_bit(HCI_INIT, &hdev->flags)) + hci_setup(hdev); +} + +static void hci_setup_link_policy(struct hci_dev *hdev) +{ + u16 link_policy = 0; + + if (hdev->features[0] & LMP_RSWITCH) + link_policy |= HCI_LP_RSWITCH; + if (hdev->features[0] & LMP_HOLD) + link_policy |= HCI_LP_HOLD; + if (hdev->features[0] & LMP_SNIFF) + link_policy |= HCI_LP_SNIFF; + if (hdev->features[1] & LMP_PARK) + link_policy |= HCI_LP_PARK; + + link_policy = cpu_to_le16(link_policy); + hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, + sizeof(link_policy), &link_policy); } static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb) @@ -449,9 +581,15 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev, struct sk_buff *skb BT_DBG("%s status 0x%x", hdev->name, rp->status); if (rp->status) - return; + goto done; memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); + + if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10)) + hci_setup_link_policy(hdev); + +done: + hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status); } static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb) @@ -567,6 +705,44 @@ static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status); } +static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status); +} + +static void hci_cc_write_inquiry_mode(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status); +} + +static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, + struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, status); +} + +static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status); +} + static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%x", hdev->name, status); @@ -1416,6 +1592,22 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_delete_stored_link_key(hdev, skb); break; + case HCI_OP_SET_EVENT_MASK: + hci_cc_set_event_mask(hdev, skb); + break; + + case HCI_OP_WRITE_INQUIRY_MODE: + hci_cc_write_inquiry_mode(hdev, skb); + break; + + case HCI_OP_READ_INQ_RSP_TX_POWER: + hci_cc_read_inq_rsp_tx_power(hdev, skb); + break; + + case HCI_OP_SET_EVENT_FLT: + hci_cc_set_event_flt(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; -- cgit v1.1 From 1aff6f09491f454d4cd9f405c783fa5e9d3168a0 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 13 Jan 2011 21:56:52 +0200 Subject: Bluetooth: Add class of device control to the management interface This patch adds the possibility for user space to fully control the Class of Device value of local adapters. To control the service class bits each UUID that's added comes with a service class "hint" which acts as a mask of bits that the UUID needs to have enabled. The set_service_cache management command is used to make sure we queue up all UUID changes as user space initializes its drivers and then send a single HCI_Write_Class_of_Device command when initialization is complete. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 118 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0854c2f..a08f4ce 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -571,7 +571,7 @@ failed: return err; } -static int uuid_rsp(struct sock *sk, u16 opcode, u16 index) +static int index_rsp(struct sock *sk, u16 opcode, u16 index) { struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; @@ -596,6 +596,39 @@ static int uuid_rsp(struct sock *sk, u16 opcode, u16 index) return 0; } +static u8 get_service_classes(struct hci_dev *hdev) +{ + struct list_head *p; + u8 val = 0; + + list_for_each(p, &hdev->uuids) { + struct bt_uuid *uuid = list_entry(p, struct bt_uuid, list); + + val |= uuid->svc_hint; + } + + return val; +} + +static int update_class(struct hci_dev *hdev) +{ + u8 cod[3]; + + BT_DBG("%s", hdev->name); + + if (test_bit(HCI_SERVICE_CACHE, &hdev->flags)) + return 0; + + cod[0] = hdev->minor_class; + cod[1] = hdev->major_class; + cod[2] = get_service_classes(hdev); + + if (memcmp(cod, hdev->dev_class, 3) == 0) + return 0; + + return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); +} + static int add_uuid(struct sock *sk, unsigned char *data, u16 len) { struct mgmt_cp_add_uuid *cp; @@ -622,10 +655,15 @@ static int add_uuid(struct sock *sk, unsigned char *data, u16 len) } memcpy(uuid->uuid, cp->uuid, 16); + uuid->svc_hint = cp->svc_hint; list_add(&uuid->list, &hdev->uuids); - err = uuid_rsp(sk, MGMT_OP_ADD_UUID, dev_id); + err = update_class(hdev); + if (err < 0) + goto failed; + + err = index_rsp(sk, MGMT_OP_ADD_UUID, dev_id); failed: hci_dev_unlock_bh(hdev); @@ -676,7 +714,11 @@ static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) goto unlock; } - err = uuid_rsp(sk, MGMT_OP_REMOVE_UUID, dev_id); + err = update_class(hdev); + if (err < 0) + goto unlock; + + err = index_rsp(sk, MGMT_OP_REMOVE_UUID, dev_id); unlock: hci_dev_unlock_bh(hdev); @@ -685,6 +727,73 @@ unlock: return err; } +static int set_dev_class(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_set_dev_class *cp; + u16 dev_id; + int err; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_SET_DEV_CLASS, ENODEV); + + hci_dev_lock_bh(hdev); + + hdev->major_class = cp->major; + hdev->minor_class = cp->minor; + + err = update_class(hdev); + + if (err == 0) + err = index_rsp(sk, MGMT_OP_SET_DEV_CLASS, dev_id); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_set_service_cache *cp; + u16 dev_id; + int err; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_SET_SERVICE_CACHE, ENODEV); + + hci_dev_lock_bh(hdev); + + BT_DBG("hci%u enable %d", dev_id, cp->enable); + + if (cp->enable) { + set_bit(HCI_SERVICE_CACHE, &hdev->flags); + err = 0; + } else { + clear_bit(HCI_SERVICE_CACHE, &hdev->flags); + err = update_class(hdev); + } + + if (err == 0) + err = index_rsp(sk, MGMT_OP_SET_SERVICE_CACHE, dev_id); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -743,6 +852,12 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_REMOVE_UUID: err = remove_uuid(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_SET_DEV_CLASS: + err = set_dev_class(sk, buf + sizeof(*hdr), len); + break; + case MGMT_OP_SET_SERVICE_CACHE: + err = set_service_cache(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); -- cgit v1.1 From 55ed8ca10f3530de8edbbf138acb50992bf5005b Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 17 Jan 2011 14:41:05 +0200 Subject: Bluetooth: Implement link key handling for the management interface This patch adds a management commands to feed the kernel with all stored link keys as well as remove specific ones or all of them. Once the load_keys command has been called the kernel takes over link key replies. A new_key event is also added to inform userspace of newly created link keys that should be stored permanently. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 85 +++++++++++++++++++++++++++++++++ net/bluetooth/hci_event.c | 51 ++++++++++++++++++++ net/bluetooth/mgmt.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 252 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 748f5a6..8ca8cf1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -970,6 +970,88 @@ int hci_uuids_clear(struct hci_dev *hdev) return 0; } +int hci_link_keys_clear(struct hci_dev *hdev) +{ + struct list_head *p, *n; + + list_for_each_safe(p, n, &hdev->link_keys) { + struct link_key *key; + + key = list_entry(p, struct link_key, list); + + list_del(p); + kfree(key); + } + + return 0; +} + +struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct list_head *p; + + list_for_each(p, &hdev->link_keys) { + struct link_key *k; + + k = list_entry(p, struct link_key, list); + + if (bacmp(bdaddr, &k->bdaddr) == 0) + return k; + } + + return NULL; +} + +int hci_add_link_key(struct hci_dev *hdev, int new_key, bdaddr_t *bdaddr, + u8 *val, u8 type, u8 pin_len) +{ + struct link_key *key, *old_key; + u8 old_key_type; + + old_key = hci_find_link_key(hdev, bdaddr); + if (old_key) { + old_key_type = old_key->type; + key = old_key; + } else { + old_key_type = 0xff; + key = kzalloc(sizeof(*key), GFP_ATOMIC); + if (!key) + return -ENOMEM; + list_add(&key->list, &hdev->link_keys); + } + + BT_DBG("%s key for %s type %u", hdev->name, batostr(bdaddr), type); + + bacpy(&key->bdaddr, bdaddr); + memcpy(key->val, val, 16); + key->type = type; + key->pin_len = pin_len; + + if (new_key) + mgmt_new_key(hdev->id, key, old_key_type); + + if (type == 0x06) + key->type = old_key_type; + + return 0; +} + +int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) +{ + struct link_key *key; + + key = hci_find_link_key(hdev, bdaddr); + if (!key) + return -ENOENT; + + BT_DBG("%s removing %s", hdev->name, batostr(bdaddr)); + + list_del(&key->list); + kfree(key); + + return 0; +} + /* Register HCI device */ int hci_register_dev(struct hci_dev *hdev) { @@ -1029,6 +1111,8 @@ int hci_register_dev(struct hci_dev *hdev) INIT_LIST_HEAD(&hdev->uuids); + INIT_LIST_HEAD(&hdev->link_keys); + INIT_WORK(&hdev->power_on, hci_power_on); INIT_WORK(&hdev->power_off, hci_power_off); setup_timer(&hdev->off_timer, hci_auto_off, (unsigned long) hdev); @@ -1105,6 +1189,7 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_dev_lock_bh(hdev); hci_blacklist_clear(hdev); hci_uuids_clear(hdev); + hci_link_keys_clear(hdev); hci_dev_unlock_bh(hdev); __hci_dev_put(hdev); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c69ee44..80ffd3a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1810,13 +1810,60 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_link_key_req *ev = (void *) skb->data; + struct hci_cp_link_key_reply cp; + struct hci_conn *conn; + struct link_key *key; + BT_DBG("%s", hdev->name); + + if (!test_bit(HCI_LINK_KEYS, &hdev->flags)) + return; + + hci_dev_lock(hdev); + + key = hci_find_link_key(hdev, &ev->bdaddr); + if (!key) { + BT_DBG("%s link key not found for %s", hdev->name, + batostr(&ev->bdaddr)); + goto not_found; + } + + BT_DBG("%s found key type %u for %s", hdev->name, key->type, + batostr(&ev->bdaddr)); + + if (!test_bit(HCI_DEBUG_KEYS, &hdev->flags) && key->type == 0x03) { + BT_DBG("%s ignoring debug key", hdev->name); + goto not_found; + } + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + + if (key->type == 0x04 && conn && conn->auth_type != 0xff && + (conn->auth_type & 0x01)) { + BT_DBG("%s ignoring unauthenticated key", hdev->name); + goto not_found; + } + + bacpy(&cp.bdaddr, &ev->bdaddr); + memcpy(cp.link_key, key->val, 16); + + hci_send_cmd(hdev, HCI_OP_LINK_KEY_REPLY, sizeof(cp), &cp); + + hci_dev_unlock(hdev); + + return; + +not_found: + hci_send_cmd(hdev, HCI_OP_LINK_KEY_NEG_REPLY, 6, &ev->bdaddr); + hci_dev_unlock(hdev); } static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_link_key_notify *ev = (void *) skb->data; struct hci_conn *conn; + u8 pin_len = 0; BT_DBG("%s", hdev->name); @@ -1829,6 +1876,10 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff hci_conn_put(conn); } + if (test_bit(HCI_LINK_KEYS, &hdev->flags)) + hci_add_link_key(hdev, 1, &ev->bdaddr, ev->link_key, + ev->key_type, pin_len); + hci_dev_unlock(hdev); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a08f4ce..bdb0e85 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -794,6 +794,99 @@ static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) return err; } +static int load_keys(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_load_keys *cp; + u16 dev_id, key_count, expected_len; + int i; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + key_count = get_unaligned_le16(&cp->key_count); + + expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info); + if (expected_len != len) { + BT_ERR("load_keys: expected %u bytes, got %u bytes", + len, expected_len); + return -EINVAL; + } + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_LOAD_KEYS, ENODEV); + + BT_DBG("hci%u debug_keys %u key_count %u", dev_id, cp->debug_keys, + key_count); + + hci_dev_lock_bh(hdev); + + hci_link_keys_clear(hdev); + + set_bit(HCI_LINK_KEYS, &hdev->flags); + + if (cp->debug_keys) + set_bit(HCI_DEBUG_KEYS, &hdev->flags); + else + clear_bit(HCI_DEBUG_KEYS, &hdev->flags); + + for (i = 0; i < key_count; i++) { + struct mgmt_key_info *key = &cp->keys[i]; + + hci_add_link_key(hdev, 0, &key->bdaddr, key->val, key->type, + key->pin_len); + } + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return 0; +} + +static int remove_key(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_remove_key *cp; + struct hci_conn *conn; + u16 dev_id; + int err; + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_REMOVE_KEY, ENODEV); + + hci_dev_lock_bh(hdev); + + err = hci_remove_link_key(hdev, &cp->bdaddr); + if (err < 0) { + err = cmd_status(sk, MGMT_OP_REMOVE_KEY, -err); + goto unlock; + } + + err = 0; + + if (!test_bit(HCI_UP, &hdev->flags) || !cp->disconnect) + goto unlock; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); + if (conn) { + struct hci_cp_disconnect dc; + + put_unaligned_le16(conn->handle, &dc.handle); + dc.reason = 0x13; /* Remote User Terminated Connection */ + err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, 0, NULL); + } + +unlock: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -858,6 +951,12 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_SET_SERVICE_CACHE: err = set_service_cache(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_LOAD_KEYS: + err = load_keys(sk, buf + sizeof(*hdr), len); + break; + case MGMT_OP_REMOVE_KEY: + err = remove_key(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); @@ -974,3 +1073,20 @@ int mgmt_connectable(u16 index, u8 connectable) return ret; } + +int mgmt_new_key(u16 index, struct link_key *key, u8 old_key_type) +{ + struct mgmt_ev_new_key ev; + + memset(&ev, 0, sizeof(ev)); + + put_unaligned_le16(index, &ev.index); + + bacpy(&ev.key.bdaddr, &key->bdaddr); + ev.key.type = key->type; + memcpy(ev.key.val, key->val, 16); + ev.key.pin_len = key->pin_len; + ev.old_key_type = old_key_type; + + return mgmt_event(MGMT_EV_NEW_KEY, &ev, sizeof(ev), NULL); +} -- cgit v1.1 From f7520543ab40341edbc2aeee7fef68218be19a0a Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 20 Jan 2011 12:34:39 +0200 Subject: Bluetooth: Add connected/disconnected management events This patch adds connected and disconnected managment events to track the connection status to remote devices. The events map directly to successful connection complete and disconnection complete HCI events for ACL links. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 16 +++++++++++----- net/bluetooth/mgmt.c | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 80ffd3a..46ddb02 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1137,6 +1137,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s conn->state = BT_CONFIG; hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; + mgmt_connected(hdev->id, &ev->bdaddr); } else conn->state = BT_CONNECTED; @@ -1269,13 +1270,18 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) { - conn->state = BT_CLOSED; + if (!conn) + goto unlock; - hci_proto_disconn_cfm(conn, ev->reason); - hci_conn_del(conn); - } + conn->state = BT_CLOSED; + + if (conn->type == ACL_LINK) + mgmt_disconnected(hdev->id, &conn->dst); + hci_proto_disconn_cfm(conn, ev->reason); + hci_conn_del(conn); + +unlock: hci_dev_unlock(hdev); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index bdb0e85..7cf1968 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1090,3 +1090,23 @@ int mgmt_new_key(u16 index, struct link_key *key, u8 old_key_type) return mgmt_event(MGMT_EV_NEW_KEY, &ev, sizeof(ev), NULL); } + +int mgmt_connected(u16 index, bdaddr_t *bdaddr) +{ + struct mgmt_ev_connected ev; + + put_unaligned_le16(index, &ev.index); + bacpy(&ev.bdaddr, bdaddr); + + return mgmt_event(MGMT_EV_CONNECTED, &ev, sizeof(ev), NULL); +} + +int mgmt_disconnected(u16 index, bdaddr_t *bdaddr) +{ + struct mgmt_ev_disconnected ev; + + put_unaligned_le16(index, &ev.index); + bacpy(&ev.bdaddr, bdaddr); + + return mgmt_event(MGMT_EV_DISCONNECTED, &ev, sizeof(ev), NULL); +} -- cgit v1.1 From 8962ee74be48df16027100f657b2b12e8ef3d34d Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 20 Jan 2011 12:40:27 +0200 Subject: Bluetooth: Add disconnect managment command This patch adds a disconnect command to the managment interface. Using this command user space is able to force the disconnection of connected devices. The command maps directly to the Disconnect HCI command. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 9 +++- net/bluetooth/mgmt.c | 119 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 46ddb02..335c60b 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1264,8 +1264,10 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff BT_DBG("%s status %d", hdev->name, ev->status); - if (ev->status) + if (ev->status) { + mgmt_disconnect_failed(hdev->id); return; + } hci_dev_lock(hdev); @@ -1680,6 +1682,11 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cs_exit_sniff_mode(hdev, ev->status); break; + case HCI_OP_DISCONNECT: + if (ev->status != 0) + mgmt_disconnect_failed(hdev->id); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7cf1968..48f266a 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -887,6 +887,60 @@ unlock: return err; } +static int disconnect(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_disconnect *cp; + struct hci_cp_disconnect dc; + struct hci_conn *conn; + u16 dev_id; + int err; + + BT_DBG(""); + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_DISCONNECT, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, MGMT_OP_DISCONNECT, ENETDOWN); + goto failed; + } + + if (mgmt_pending_find(MGMT_OP_DISCONNECT, dev_id)) { + err = cmd_status(sk, MGMT_OP_DISCONNECT, EBUSY); + goto failed; + } + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); + if (!conn) { + err = cmd_status(sk, MGMT_OP_DISCONNECT, ENOTCONN); + goto failed; + } + + err = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, dev_id, data, len); + if (err < 0) + goto failed; + + put_unaligned_le16(conn->handle, &dc.handle); + dc.reason = 0x13; /* Remote User Terminated Connection */ + + err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc); + if (err < 0) + mgmt_pending_remove(MGMT_OP_DISCONNECT, dev_id); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -957,6 +1011,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_REMOVE_KEY: err = remove_key(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_DISCONNECT: + err = disconnect(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); @@ -1101,12 +1158,72 @@ int mgmt_connected(u16 index, bdaddr_t *bdaddr) return mgmt_event(MGMT_EV_CONNECTED, &ev, sizeof(ev), NULL); } +static void disconnect_rsp(struct pending_cmd *cmd, void *data) +{ + struct mgmt_cp_disconnect *cp = cmd->cmd; + struct sock **sk = data; + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_disconnect *rp; + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + if (!skb) + return; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_DISCONNECT, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + put_unaligned_le16(cmd->index, &rp->index); + bacpy(&rp->bdaddr, &cp->bdaddr); + + if (sock_queue_rcv_skb(cmd->sk, skb) < 0) + kfree_skb(skb); + + *sk = cmd->sk; + sock_hold(*sk); + + list_del(&cmd->list); + mgmt_pending_free(cmd); +} + int mgmt_disconnected(u16 index, bdaddr_t *bdaddr) { struct mgmt_ev_disconnected ev; + struct sock *sk = NULL; + int err; + + mgmt_pending_foreach(MGMT_OP_DISCONNECT, index, disconnect_rsp, &sk); put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); - return mgmt_event(MGMT_EV_DISCONNECTED, &ev, sizeof(ev), NULL); + err = mgmt_event(MGMT_EV_DISCONNECTED, &ev, sizeof(ev), sk); + + if (sk) + sock_put(sk); + + return err; +} + +int mgmt_disconnect_failed(u16 index) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_DISCONNECT, index); + if (!cmd) + return -ENOENT; + + err = cmd_status(cmd->sk, MGMT_OP_DISCONNECT, EIO); + + list_del(&cmd->list); + mgmt_pending_free(cmd); + + return err; } -- cgit v1.1 From 17d5c04cb597418a177c3ca18dfde679636dd51c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 22 Jan 2011 06:09:08 +0200 Subject: Bluetooth: Add support for connect failed management event This patch add a new connect failed management event to track failures in connecting to remote devices. It is particularly useful for security mode 3 scenarios when we don't have a connected state while pairing but still need to detect when the connect attempt failed. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 5 ++++- net/bluetooth/mgmt.c | 11 +++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 335c60b..995ae6c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1166,8 +1166,11 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_send_cmd(hdev, HCI_OP_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } - } else + } else { conn->state = BT_CLOSED; + if (conn->type == ACL_LINK) + mgmt_connect_failed(hdev->id, &ev->bdaddr, ev->status); + } if (conn->type == ACL_LINK) hci_sco_setup(conn, ev->status); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 48f266a..9fb989f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1227,3 +1227,14 @@ int mgmt_disconnect_failed(u16 index) return err; } + +int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status) +{ + struct mgmt_ev_connect_failed ev; + + put_unaligned_le16(index, &ev.index); + bacpy(&ev.bdaddr, bdaddr); + ev.status = status; + + return mgmt_event(MGMT_EV_CONNECT_FAILED, &ev, sizeof(ev), NULL); +} -- cgit v1.1 From 2784eb41b1fbb3ff80f4921fe9dbb4c4acb6dc24 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Fri, 21 Jan 2011 13:56:35 +0200 Subject: Bluetooth: Add get_connections managment interface command This patch adds a get_connections command to the management interface. With this command userspace can get the current list of connected devices. Typically this command would only be used once when enumerating existing adapters. After that the connected and disconnected events are used to track connections. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 9fb989f..8f4f47e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -941,6 +941,75 @@ failed: return err; } +static int get_connections(struct sock *sk, unsigned char *data, u16 len) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_cp_get_connections *cp; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_get_connections *rp; + struct hci_dev *hdev; + struct list_head *p; + size_t body_len; + u16 dev_id, count; + int i, err; + + BT_DBG(""); + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_GET_CONNECTIONS, ENODEV); + + hci_dev_lock_bh(hdev); + + count = 0; + list_for_each(p, &hdev->conn_hash.list) { + count++; + } + + body_len = sizeof(*ev) + sizeof(*rp) + (count * sizeof(bdaddr_t)); + skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); + if (!skb) { + err = -ENOMEM; + goto unlock; + } + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(body_len); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_GET_CONNECTIONS, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp) + (count * sizeof(bdaddr_t))); + put_unaligned_le16(dev_id, &rp->index); + put_unaligned_le16(count, &rp->conn_count); + + read_lock(&hci_dev_list_lock); + + i = 0; + list_for_each(p, &hdev->conn_hash.list) { + struct hci_conn *c = list_entry(p, struct hci_conn, list); + + bacpy(&rp->conn[i++], &c->dst); + } + + read_unlock(&hci_dev_list_lock); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + err = 0; + +unlock: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -1014,6 +1083,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_DISCONNECT: err = disconnect(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_GET_CONNECTIONS: + err = get_connections(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); -- cgit v1.1 From a38528f1117590169c0bf61cbf874e9fd2d5c5c9 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 22 Jan 2011 06:46:43 +0200 Subject: Bluetooth: Create common cmd_complete function for mgmt.c A lot of management code needs to generate command complete events so it makes sense to have a helper function for this. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 227 +++++++++++++++------------------------------------ 1 file changed, 67 insertions(+), 160 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 8f4f47e..005288b 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -69,29 +69,26 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status) return 0; } -static int read_version(struct sock *sk) +static int cmd_complete(struct sock *sk, u16 cmd, void *rp, size_t rp_len) { struct sk_buff *skb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; - struct mgmt_rp_read_version *rp; BT_DBG("sock %p", sk); - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + rp_len, GFP_ATOMIC); if (!skb) return -ENOMEM; hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_READ_VERSION, &ev->opcode); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); - rp = (void *) skb_put(skb, sizeof(*rp)); - rp->version = MGMT_VERSION; - put_unaligned_le16(MGMT_REVISION, &rp->revision); + ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); + put_unaligned_le16(cmd, &ev->opcode); + memcpy(ev->data, rp, rp_len); if (sock_queue_rcv_skb(sk, skb) < 0) kfree_skb(skb); @@ -99,16 +96,25 @@ static int read_version(struct sock *sk) return 0; } +static int read_version(struct sock *sk) +{ + struct mgmt_rp_read_version rp; + + BT_DBG("sock %p", sk); + + rp.version = MGMT_VERSION; + put_unaligned_le16(MGMT_REVISION, &rp.revision); + + return cmd_complete(sk, MGMT_OP_READ_VERSION, &rp, sizeof(rp)); +} + static int read_index_list(struct sock *sk) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; struct mgmt_rp_read_index_list *rp; struct list_head *p; - size_t body_len; + size_t rp_len; u16 count; - int i; + int i, err; BT_DBG("sock %p", sk); @@ -119,21 +125,13 @@ static int read_index_list(struct sock *sk) count++; } - body_len = sizeof(*ev) + sizeof(*rp) + (2 * count); - skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); - if (!skb) { + rp_len = sizeof(*rp) + (2 * count); + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { read_unlock(&hci_dev_list_lock); return -ENOMEM; } - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(body_len); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_READ_INDEX_LIST, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp) + (2 * count)); put_unaligned_le16(count, &rp->num_controllers); i = 0; @@ -153,19 +151,17 @@ static int read_index_list(struct sock *sk) read_unlock(&hci_dev_list_lock); - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); + err = cmd_complete(sk, MGMT_OP_READ_INDEX_LIST, rp, rp_len); - return 0; + kfree(rp); + + return err; } static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - struct mgmt_rp_read_info *rp; - struct mgmt_cp_read_info *cp; + struct mgmt_rp_read_info rp; + struct mgmt_cp_read_info *cp = (void *) data; struct hci_dev *hdev; u16 dev_id; @@ -174,29 +170,13 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) if (len != 2) return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL); - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_READ_INFO, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp)); - - cp = (void *) data; dev_id = get_unaligned_le16(&cp->index); BT_DBG("request for hci%u", dev_id); hdev = hci_dev_get(dev_id); - if (!hdev) { - kfree_skb(skb); + if (!hdev) return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); - } hci_del_off_timer(hdev); @@ -204,35 +184,32 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) set_bit(HCI_MGMT, &hdev->flags); - put_unaligned_le16(hdev->id, &rp->index); - rp->type = hdev->dev_type; + put_unaligned_le16(hdev->id, &rp.index); + rp.type = hdev->dev_type; - rp->powered = test_bit(HCI_UP, &hdev->flags); - rp->connectable = test_bit(HCI_PSCAN, &hdev->flags); - rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags); - rp->pairable = test_bit(HCI_PSCAN, &hdev->flags); + rp.powered = test_bit(HCI_UP, &hdev->flags); + rp.connectable = test_bit(HCI_PSCAN, &hdev->flags); + rp.discoverable = test_bit(HCI_ISCAN, &hdev->flags); + rp.pairable = test_bit(HCI_PSCAN, &hdev->flags); if (test_bit(HCI_AUTH, &hdev->flags)) - rp->sec_mode = 3; + rp.sec_mode = 3; else if (hdev->ssp_mode > 0) - rp->sec_mode = 4; + rp.sec_mode = 4; else - rp->sec_mode = 2; + rp.sec_mode = 2; - bacpy(&rp->bdaddr, &hdev->bdaddr); - memcpy(rp->features, hdev->features, 8); - memcpy(rp->dev_class, hdev->dev_class, 3); - put_unaligned_le16(hdev->manufacturer, &rp->manufacturer); - rp->hci_ver = hdev->hci_ver; - put_unaligned_le16(hdev->hci_rev, &rp->hci_rev); + bacpy(&rp.bdaddr, &hdev->bdaddr); + memcpy(rp.features, hdev->features, 8); + memcpy(rp.dev_class, hdev->dev_class, 3); + put_unaligned_le16(hdev->manufacturer, &rp.manufacturer); + rp.hci_ver = hdev->hci_ver; + put_unaligned_le16(hdev->hci_rev, &rp.hci_rev); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); - - return 0; + return cmd_complete(sk, MGMT_OP_READ_INFO, &rp, sizeof(rp)); } static void mgmt_pending_free(struct pending_cmd *cmd) @@ -506,30 +483,12 @@ static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk) static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val) { - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - struct mgmt_mode *rp; - struct sk_buff *skb; + struct mgmt_mode rp; - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); - if (!skb) - return -ENOMEM; + put_unaligned_le16(index, &rp.index); + rp.val = val; - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(opcode, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp)); - put_unaligned_le16(index, &rp->index); - rp->val = val; - - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); - - return 0; + return cmd_complete(sk, opcode, &rp, sizeof(rp)); } static int set_pairable(struct sock *sk, unsigned char *data, u16 len) @@ -571,31 +530,6 @@ failed: return err; } -static int index_rsp(struct sock *sk, u16 opcode, u16 index) -{ - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - struct sk_buff *skb; - - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(index), GFP_ATOMIC); - if (!skb) - return -ENOMEM; - - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(index)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(opcode, &ev->opcode); - - put_unaligned_le16(index, skb_put(skb, sizeof(index))); - - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); - - return 0; -} - static u8 get_service_classes(struct hci_dev *hdev) { struct list_head *p; @@ -663,7 +597,7 @@ static int add_uuid(struct sock *sk, unsigned char *data, u16 len) if (err < 0) goto failed; - err = index_rsp(sk, MGMT_OP_ADD_UUID, dev_id); + err = cmd_complete(sk, MGMT_OP_ADD_UUID, &dev_id, sizeof(dev_id)); failed: hci_dev_unlock_bh(hdev); @@ -718,7 +652,7 @@ static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) if (err < 0) goto unlock; - err = index_rsp(sk, MGMT_OP_REMOVE_UUID, dev_id); + err = cmd_complete(sk, MGMT_OP_REMOVE_UUID, &dev_id, sizeof(dev_id)); unlock: hci_dev_unlock_bh(hdev); @@ -751,7 +685,8 @@ static int set_dev_class(struct sock *sk, unsigned char *data, u16 len) err = update_class(hdev); if (err == 0) - err = index_rsp(sk, MGMT_OP_SET_DEV_CLASS, dev_id); + err = cmd_complete(sk, MGMT_OP_SET_DEV_CLASS, &dev_id, + sizeof(dev_id)); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -786,7 +721,8 @@ static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) } if (err == 0) - err = index_rsp(sk, MGMT_OP_SET_SERVICE_CACHE, dev_id); + err = cmd_complete(sk, MGMT_OP_SET_SERVICE_CACHE, &dev_id, + sizeof(dev_id)); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -943,14 +879,11 @@ failed: static int get_connections(struct sock *sk, unsigned char *data, u16 len) { - struct sk_buff *skb; - struct mgmt_hdr *hdr; struct mgmt_cp_get_connections *cp; - struct mgmt_ev_cmd_complete *ev; struct mgmt_rp_get_connections *rp; struct hci_dev *hdev; struct list_head *p; - size_t body_len; + size_t rp_len; u16 dev_id, count; int i, err; @@ -970,21 +903,13 @@ static int get_connections(struct sock *sk, unsigned char *data, u16 len) count++; } - body_len = sizeof(*ev) + sizeof(*rp) + (count * sizeof(bdaddr_t)); - skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); - if (!skb) { + rp_len = sizeof(*rp) + (count * sizeof(bdaddr_t)); + rp = kmalloc(rp_len, GFP_ATOMIC); + if (!rp) { err = -ENOMEM; goto unlock; } - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(body_len); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_GET_CONNECTIONS, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp) + (count * sizeof(bdaddr_t))); put_unaligned_le16(dev_id, &rp->index); put_unaligned_le16(count, &rp->conn_count); @@ -999,12 +924,10 @@ static int get_connections(struct sock *sk, unsigned char *data, u16 len) read_unlock(&hci_dev_list_lock); - if (sock_queue_rcv_skb(sk, skb) < 0) - kfree_skb(skb); - - err = 0; + err = cmd_complete(sk, MGMT_OP_GET_CONNECTIONS, rp, rp_len); unlock: + kfree(rp); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); return err; @@ -1234,28 +1157,12 @@ static void disconnect_rsp(struct pending_cmd *cmd, void *data) { struct mgmt_cp_disconnect *cp = cmd->cmd; struct sock **sk = data; - struct sk_buff *skb; - struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_complete *ev; - struct mgmt_rp_disconnect *rp; + struct mgmt_rp_disconnect rp; - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); - if (!skb) - return; + put_unaligned_le16(cmd->index, &rp.index); + bacpy(&rp.bdaddr, &cp->bdaddr); - hdr = (void *) skb_put(skb, sizeof(*hdr)); - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); - hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); - - ev = (void *) skb_put(skb, sizeof(*ev)); - put_unaligned_le16(MGMT_OP_DISCONNECT, &ev->opcode); - - rp = (void *) skb_put(skb, sizeof(*rp)); - put_unaligned_le16(cmd->index, &rp->index); - bacpy(&rp->bdaddr, &cp->bdaddr); - - if (sock_queue_rcv_skb(cmd->sk, skb) < 0) - kfree_skb(skb); + cmd_complete(cmd->sk, MGMT_OP_DISCONNECT, &rp, sizeof(rp)); *sk = cmd->sk; sock_hold(*sk); -- cgit v1.1 From 980e1a537fed7dfa53e9a4b6e586b43341f8c2d5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 22 Jan 2011 06:10:07 +0200 Subject: Bluetooth: Add support for PIN code handling in the management interface This patch adds the necessary commands and events needed to communicate PIN code related actions between the kernel and userspace. This includes a pin_code_request event as well as pin_code_reply and pin_code_negative_reply commands. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 46 +++++++++++++++ net/bluetooth/mgmt.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 995ae6c..98bcf78 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -743,6 +743,40 @@ static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status); } +static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_pin_code_reply *rp = (void *) skb->data; + struct hci_cp_pin_code_reply *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_pin_code_reply_complete(hdev->id, &rp->bdaddr, rp->status); + + if (rp->status != 0) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY); + if (!cp) + return; + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); + if (conn) + conn->pin_length = cp->pin_len; +} + +static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_pin_code_neg_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_pin_code_neg_reply_complete(hdev->id, &rp->bdaddr, + rp->status); +} + static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%x", hdev->name, status); @@ -1619,6 +1653,14 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_set_event_flt(hdev, skb); break; + case HCI_OP_PIN_CODE_REPLY: + hci_cc_pin_code_reply(hdev, skb); + break; + + case HCI_OP_PIN_CODE_NEG_REPLY: + hci_cc_pin_code_neg_reply(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; @@ -1821,6 +1863,9 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(ev->bdaddr), &ev->bdaddr); + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_pin_code_request(hdev->id, &ev->bdaddr); + hci_dev_unlock(hdev); } @@ -1889,6 +1934,7 @@ static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff if (conn) { hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; + pin_len = conn->pin_length; hci_conn_put(conn); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 005288b..3800aaf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -933,6 +933,89 @@ unlock: return err; } +static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_pin_code_reply *cp; + struct hci_cp_pin_code_reply reply; + u16 dev_id; + int err; + + BT_DBG(""); + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_DISCONNECT, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, MGMT_OP_PIN_CODE_REPLY, ENETDOWN); + goto failed; + } + + err = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, dev_id, data, len); + if (err < 0) + goto failed; + + bacpy(&reply.bdaddr, &cp->bdaddr); + reply.pin_len = cp->pin_len; + memcpy(reply.pin_code, cp->pin_code, 16); + + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_REPLY, sizeof(reply), &reply); + if (err < 0) + mgmt_pending_remove(MGMT_OP_PIN_CODE_REPLY, dev_id); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + +static int pin_code_neg_reply(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_pin_code_neg_reply *cp; + u16 dev_id; + int err; + + BT_DBG(""); + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_PIN_CODE_NEG_REPLY, ENODEV); + + hci_dev_lock_bh(hdev); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, MGMT_OP_PIN_CODE_NEG_REPLY, ENETDOWN); + goto failed; + } + + err = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, dev_id, + data, len); + if (err < 0) + goto failed; + + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(bdaddr_t), + &cp->bdaddr); + if (err < 0) + mgmt_pending_remove(MGMT_OP_PIN_CODE_NEG_REPLY, dev_id); + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -1009,6 +1092,12 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_GET_CONNECTIONS: err = get_connections(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_PIN_CODE_REPLY: + err = pin_code_reply(sk, buf + sizeof(*hdr), len); + break; + case MGMT_OP_PIN_CODE_NEG_REPLY: + err = pin_code_neg_reply(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); @@ -1217,3 +1306,55 @@ int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status) return mgmt_event(MGMT_EV_CONNECT_FAILED, &ev, sizeof(ev), NULL); } + +int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr) +{ + struct mgmt_ev_pin_code_request ev; + + put_unaligned_le16(index, &ev.index); + bacpy(&ev.bdaddr, bdaddr); + + return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, &ev, sizeof(ev), NULL); +} + +int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, index); + if (!cmd) + return -ENOENT; + + if (status != 0) + err = cmd_status(cmd->sk, MGMT_OP_PIN_CODE_REPLY, status); + else + err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_REPLY, + bdaddr, sizeof(*bdaddr)); + + list_del(&cmd->list); + mgmt_pending_free(cmd); + + return err; +} + +int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, index); + if (!cmd) + return -ENOENT; + + if (status != 0) + err = cmd_status(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, status); + else + err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, + bdaddr, sizeof(*bdaddr)); + + list_del(&cmd->list); + mgmt_pending_free(cmd); + + return err; +} -- cgit v1.1 From 17fa4b9dff72fb3a1a68cc80caf98fc941d2b8b3 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 25 Jan 2011 13:28:33 +0200 Subject: Bluetooth: Add set_io_capability management command This patch adds a new set_io_capability management command which is used to set the IO capability for Secure Simple Pairing (SSP) as well as the Security Manager Protocol (SMP). The value is per hci_dev and each hci_conn object inherits it upon creation. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 1 + net/bluetooth/hci_core.c | 1 + net/bluetooth/hci_event.c | 30 ++++++++++++++++++++++++++++-- net/bluetooth/mgmt.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 99cd8d9..42dc39f 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -234,6 +234,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->mode = HCI_CM_ACTIVE; conn->state = BT_OPEN; conn->auth_type = HCI_AT_GENERAL_BONDING; + conn->io_capability = hdev->io_capability; conn->power_save = 1; conn->disc_timeout = HCI_DISCONN_TIMEOUT; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8ca8cf1..bf6729a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1084,6 +1084,7 @@ int hci_register_dev(struct hci_dev *hdev) hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); + hdev->io_capability = 0x03; /* No Input No Output */ hdev->idle_timeout = 0; hdev->sniff_max_interval = 800; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 98bcf78..617f583 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2198,6 +2198,25 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct hci_dev_unlock(hdev); } +static inline u8 hci_get_auth_req(struct hci_conn *conn) +{ + /* If remote requests dedicated bonding follow that lead */ + if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) { + /* If both remote and local IO capabilities allow MITM + * protection then require it, otherwise don't */ + if (conn->remote_cap == 0x03 || conn->io_capability == 0x03) + return 0x02; + else + return 0x03; + } + + /* If remote requests no-bonding follow that lead */ + if (conn->remote_auth == 0x00 || conn->remote_auth == 0x01) + return 0x00; + + return conn->auth_type; +} + static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_io_capa_request *ev = (void *) skb->data; @@ -2218,8 +2237,15 @@ static inline void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff if (test_bit(HCI_PAIRABLE, &hdev->flags) || (conn->remote_auth & ~0x01) == HCI_AT_NO_BONDING) { - /* FIXME: Do IO capa response based on information - * provided through the management interface */ + struct hci_cp_io_capability_reply cp; + + bacpy(&cp.bdaddr, &ev->bdaddr); + cp.capability = conn->io_capability; + cp.oob_data = 0; + cp.authentication = hci_get_auth_req(conn); + + hci_send_cmd(hdev, HCI_OP_IO_CAPABILITY_REPLY, + sizeof(cp), &cp); } else { struct hci_cp_io_capability_neg_reply cp; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3800aaf..b2bda830 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1016,6 +1016,35 @@ failed: return err; } +static int set_io_capability(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_set_io_capability *cp; + u16 dev_id; + + BT_DBG(""); + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_SET_IO_CAPABILITY, ENODEV); + + hci_dev_lock_bh(hdev); + + hdev->io_capability = cp->io_capability; + + BT_DBG("%s IO capability set to 0x%02x", hdev->name, + hdev->io_capability); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return cmd_complete(sk, MGMT_OP_SET_IO_CAPABILITY, + &dev_id, sizeof(dev_id)); +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -1098,6 +1127,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_PIN_CODE_NEG_REPLY: err = pin_code_neg_reply(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_SET_IO_CAPABILITY: + err = set_io_capability(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); -- cgit v1.1 From 5a08eccedaa1e12b74cf3afea9e11a9aefc29f73 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Tue, 11 Jan 2011 17:20:20 +0200 Subject: Bluetooth: Do not use assignments in IF conditions Fix checkpatch warnings concerning assignments in if conditions. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- net/bluetooth/af_bluetooth.c | 6 ++++-- net/bluetooth/cmtp/capi.c | 3 ++- net/bluetooth/cmtp/core.c | 9 ++++++--- net/bluetooth/hci_core.c | 12 +++++++++--- net/bluetooth/hci_event.c | 22 ++++++++++++++-------- net/bluetooth/hidp/core.c | 9 ++++++--- net/bluetooth/l2cap.c | 5 ++++- 7 files changed, 45 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index c4cf3f5..a6732b5 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -240,7 +240,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; - if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) { + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) return 0; return err; @@ -323,7 +324,8 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (copied >= target) break; - if ((err = sock_error(sk)) != 0) + err = sock_error(sk); + if (err) break; if (sk->sk_shutdown & RCV_SHUTDOWN) break; diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 3487cfe..67cff810 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -155,7 +155,8 @@ static void cmtp_send_interopmsg(struct cmtp_session *session, BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum); - if (!(skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC))) { + skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC); + if (!skb) { BT_ERR("Can't allocate memory for interoperability packet"); return; } diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 8e5f292..2cee71a 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -115,7 +115,8 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const size = (skb) ? skb->len + count : count; - if (!(nskb = alloc_skb(size, GFP_ATOMIC))) { + nskb = alloc_skb(size, GFP_ATOMIC); + if (!nskb) { BT_ERR("Can't allocate memory for CAPI message"); return; } @@ -216,7 +217,8 @@ static void cmtp_process_transmit(struct cmtp_session *session) BT_DBG("session %p", session); - if (!(nskb = alloc_skb(session->mtu, GFP_ATOMIC))) { + nskb = alloc_skb(session->mtu, GFP_ATOMIC); + if (!nskb) { BT_ERR("Can't allocate memory for new frame"); return; } @@ -224,7 +226,8 @@ static void cmtp_process_transmit(struct cmtp_session *session) while ((skb = skb_dequeue(&session->transmit))) { struct cmtp_scb *scb = (void *) skb->cb; - if ((tail = (session->mtu - nskb->len)) < 5) { + tail = session->mtu - nskb->len; + if (tail < 5) { cmtp_send_frame(session, nskb->data, nskb->len); skb_trim(nskb, 0); tail = session->mtu; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bf6729a..2f00322 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -429,7 +429,8 @@ int hci_inquiry(void __user *arg) if (copy_from_user(&ir, ptr, sizeof(ir))) return -EFAULT; - if (!(hdev = hci_dev_get(ir.dev_id))) + hdev = hci_dev_get(ir.dev_id); + if (!hdev) return -ENODEV; hci_dev_lock_bh(hdev); @@ -489,7 +490,8 @@ int hci_dev_open(__u16 dev) struct hci_dev *hdev; int ret = 0; - if (!(hdev = hci_dev_get(dev))) + hdev = hci_dev_get(dev); + if (!hdev) return -ENODEV; BT_DBG("%s %p", hdev->name, hdev); @@ -1940,7 +1942,11 @@ static void hci_cmd_task(unsigned long arg) } /* Send queued commands */ - if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { + if (atomic_read(&hdev->cmd_cnt)) { + skb = skb_dequeue(&hdev->cmd_q); + if (!skb) + return; + kfree_skb(hdev->sent_cmd); hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 617f583..cee46cb 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -851,11 +851,14 @@ static void hci_cs_add_sco(struct hci_dev *hdev, __u8 status) hci_dev_lock(hdev); acl = hci_conn_hash_lookup_handle(hdev, handle); - if (acl && (sco = acl->link)) { - sco->state = BT_CLOSED; + if (acl) { + sco = acl->link; + if (sco) { + sco->state = BT_CLOSED; - hci_proto_connect_cfm(sco, status); - hci_conn_del(sco); + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); + } } hci_dev_unlock(hdev); @@ -1037,11 +1040,14 @@ static void hci_cs_setup_sync_conn(struct hci_dev *hdev, __u8 status) hci_dev_lock(hdev); acl = hci_conn_hash_lookup_handle(hdev, handle); - if (acl && (sco = acl->link)) { - sco->state = BT_CLOSED; + if (acl) { + sco = acl->link; + if (sco) { + sco->state = BT_CLOSED; - hci_proto_connect_cfm(sco, status); - hci_conn_del(sco); + hci_proto_connect_cfm(sco, status); + hci_conn_del(sco); + } } hci_dev_unlock(hdev); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 29544c2..e0de929 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -157,7 +157,8 @@ static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev, session->leds = newleds; - if (!(skb = alloc_skb(3, GFP_ATOMIC))) { + skb = alloc_skb(3, GFP_ATOMIC); + if (!skb) { BT_ERR("Can't allocate memory for new frame"); return -ENOMEM; } @@ -250,7 +251,8 @@ static int __hidp_send_ctrl_message(struct hidp_session *session, BT_DBG("session %p data %p size %d", session, data, size); - if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { + skb = alloc_skb(size + 1, GFP_ATOMIC); + if (!skb) { BT_ERR("Can't allocate memory for new frame"); return -ENOMEM; } @@ -283,7 +285,8 @@ static int hidp_queue_report(struct hidp_session *session, BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size); - if (!(skb = alloc_skb(size + 1, GFP_ATOMIC))) { + skb = alloc_skb(size + 1, GFP_ATOMIC); + if (!skb) { BT_ERR("Can't allocate memory for new frame"); return -ENOMEM; } diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index cbaa740..28d2954 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -4724,7 +4724,10 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl { struct l2cap_conn *conn = hcon->l2cap_data; - if (!conn && !(conn = l2cap_conn_add(hcon, 0))) + if (!conn) + conn = l2cap_conn_add(hcon, 0); + + if (!conn) goto drop; BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags); -- cgit v1.1 From d37f50e19094862a5d60d79637d6f4dbdc42f4f1 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Mon, 24 Jan 2011 10:53:24 +0200 Subject: Bluetooth: fix crash by disabling tasklet in sock accept Crash can happen when tasklet handling connect/disconnect requests preempts socket accept. Can be reproduced with "l2test -r" on one side and several "l2test -c -b 1000 -i hci0 -P 10 " on the other side. disable taskets in socket accept and change lock_sock and release_sock to bh_lock_sock and bh_unlock_sock since we have to use spinlocks and there is no need to mark sock as owned by user. ... [ 3555.897247] Unable to handle kernel NULL pointer dereference at virtual address 000000bc [ 3555.915039] pgd = cab9c000 [ 3555.917785] [000000bc] *pgd=8bf3d031, *pte=00000000, *ppte=00000000 [ 3555.928314] Internal error: Oops: 17 [#1] PREEMPT [ 3555.999786] CPU: 0 Not tainted (2.6.32.21-13874-g67918ef #65) ... [ 3556.005981] PC is at bt_accept_unlink+0x20/0x58 [bluetooth] [ 3556.011627] LR is at bt_accept_dequeue+0x3c/0xe8 [bluetooth] ... [ 3556.161285] [] (bt_accept_unlink+0x20/0x58 [bluetooth]) from [] (bt_accept_dequeue+0x3c/0xe8 [bluetooth]) [ 3556.172729] [] (bt_accept_dequeue+0x3c/0xe8 [bluetooth]) from [] (l2cap_sock_accept+0x100/0x15c [l2cap]) [ 3556.184082] [] (l2cap_sock_accept+0x100/0x15c [l2cap]) from [] (sys_accept4+0x120/0x1e0) [ 3556.193969] [] (sys_accept4+0x120/0x1e0) from [] (ret_fast_syscall+0x0/0x2c) [ 3556.202819] Code: e5813000 e5901164 e580c160 e580c15c (e1d13bbc) ... Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- net/bluetooth/af_bluetooth.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index a6732b5..2abfe2f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -199,14 +199,15 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) BT_DBG("parent %p", parent); + local_bh_disable(); list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); - lock_sock(sk); + bh_lock_sock(sk); /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { - release_sock(sk); + bh_unlock_sock(sk); bt_accept_unlink(sk); continue; } @@ -216,12 +217,16 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) bt_accept_unlink(sk); if (newsock) sock_graft(sk, newsock); - release_sock(sk); + + bh_unlock_sock(sk); + local_bh_enable(); return sk; } - release_sock(sk); + bh_unlock_sock(sk); } + local_bh_enable(); + return NULL; } EXPORT_SYMBOL(bt_accept_dequeue); -- cgit v1.1 From 0a708f8fc46fde3be2116b8d79f7469a24097c90 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Thu, 3 Feb 2011 18:58:10 -0200 Subject: Bluetooth: Rename l2cap.c to l2cap_core.c In a preparation to the the L2CAP code split in many files. Signed-off-by: Gustavo F. Padovan --- net/bluetooth/Makefile | 1 + net/bluetooth/l2cap.c | 4984 -------------------------------------------- net/bluetooth/l2cap_core.c | 4984 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 4985 insertions(+), 4984 deletions(-) delete mode 100644 net/bluetooth/l2cap.c create mode 100644 net/bluetooth/l2cap_core.c (limited to 'net') diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 250f954..bf2945e 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o +l2cap-y := l2cap_core.o diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c deleted file mode 100644 index 28d2954..0000000 --- a/net/bluetooth/l2cap.c +++ /dev/null @@ -1,4984 +0,0 @@ -/* - BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2000-2001 Qualcomm Incorporated - Copyright (C) 2009-2010 Gustavo F. Padovan - Copyright (C) 2010 Google Inc. - - Written 2000,2001 by Maxim Krasnyansky - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -/* Bluetooth L2CAP core and sockets. */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#define VERSION "2.15" - -static int disable_ertm; - -static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; -static u8 l2cap_fixed_chan[8] = { 0x02, }; - -static const struct proto_ops l2cap_sock_ops; - -static struct workqueue_struct *_busy_wq; - -static struct bt_sock_list l2cap_sk_list = { - .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) -}; - -static void l2cap_busy_work(struct work_struct *work); - -static void __l2cap_sock_close(struct sock *sk, int reason); -static void l2cap_sock_close(struct sock *sk); -static void l2cap_sock_kill(struct sock *sk); - -static int l2cap_build_conf_req(struct sock *sk, void *data); -static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, - u8 code, u8 ident, u16 dlen, void *data); - -static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); - -/* ---- L2CAP timers ---- */ -static void l2cap_sock_set_timer(struct sock *sk, long timeout) -{ - BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); -} - -static void l2cap_sock_clear_timer(struct sock *sk) -{ - BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); -} - -static void l2cap_sock_timeout(unsigned long arg) -{ - struct sock *sk = (struct sock *) arg; - int reason; - - BT_DBG("sock %p state %d", sk, sk->sk_state); - - bh_lock_sock(sk); - - if (sock_owned_by_user(sk)) { - /* sk is owned by user. Try again later */ - l2cap_sock_set_timer(sk, HZ / 5); - bh_unlock_sock(sk); - sock_put(sk); - return; - } - - if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) - reason = ECONNREFUSED; - else if (sk->sk_state == BT_CONNECT && - l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) - reason = ECONNREFUSED; - else - reason = ETIMEDOUT; - - __l2cap_sock_close(sk, reason); - - bh_unlock_sock(sk); - - l2cap_sock_kill(sk); - sock_put(sk); -} - -/* ---- L2CAP channels ---- */ -static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->dcid == cid) - break; - } - return s; -} - -static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->scid == cid) - break; - } - return s; -} - -/* Find channel with given SCID. - * Returns locked socket */ -static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) -{ - struct sock *s; - read_lock(&l->lock); - s = __l2cap_get_chan_by_scid(l, cid); - if (s) - bh_lock_sock(s); - read_unlock(&l->lock); - return s; -} - -static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) -{ - struct sock *s; - for (s = l->head; s; s = l2cap_pi(s)->next_c) { - if (l2cap_pi(s)->ident == ident) - break; - } - return s; -} - -static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) -{ - struct sock *s; - read_lock(&l->lock); - s = __l2cap_get_chan_by_ident(l, ident); - if (s) - bh_lock_sock(s); - read_unlock(&l->lock); - return s; -} - -static u16 l2cap_alloc_cid(struct l2cap_chan_list *l) -{ - u16 cid = L2CAP_CID_DYN_START; - - for (; cid < L2CAP_CID_DYN_END; cid++) { - if (!__l2cap_get_chan_by_scid(l, cid)) - return cid; - } - - return 0; -} - -static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk) -{ - sock_hold(sk); - - if (l->head) - l2cap_pi(l->head)->prev_c = sk; - - l2cap_pi(sk)->next_c = l->head; - l2cap_pi(sk)->prev_c = NULL; - l->head = sk; -} - -static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk) -{ - struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c; - - write_lock_bh(&l->lock); - if (sk == l->head) - l->head = next; - - if (next) - l2cap_pi(next)->prev_c = prev; - if (prev) - l2cap_pi(prev)->next_c = next; - write_unlock_bh(&l->lock); - - __sock_put(sk); -} - -static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) -{ - struct l2cap_chan_list *l = &conn->chan_list; - - BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, - l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); - - conn->disc_reason = 0x13; - - l2cap_pi(sk)->conn = conn; - - if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) { - /* Alloc CID for connection-oriented socket */ - l2cap_pi(sk)->scid = l2cap_alloc_cid(l); - } else if (sk->sk_type == SOCK_DGRAM) { - /* Connectionless socket */ - l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS; - l2cap_pi(sk)->dcid = L2CAP_CID_CONN_LESS; - l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; - } else { - /* Raw socket can send/recv signalling messages only */ - l2cap_pi(sk)->scid = L2CAP_CID_SIGNALING; - l2cap_pi(sk)->dcid = L2CAP_CID_SIGNALING; - l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; - } - - __l2cap_chan_link(l, sk); - - if (parent) - bt_accept_enqueue(parent, sk); -} - -/* Delete channel. - * Must be called on the locked socket. */ -static void l2cap_chan_del(struct sock *sk, int err) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct sock *parent = bt_sk(sk)->parent; - - l2cap_sock_clear_timer(sk); - - BT_DBG("sk %p, conn %p, err %d", sk, conn, err); - - if (conn) { - /* Unlink from channel list */ - l2cap_chan_unlink(&conn->chan_list, sk); - l2cap_pi(sk)->conn = NULL; - hci_conn_put(conn->hcon); - } - - sk->sk_state = BT_CLOSED; - sock_set_flag(sk, SOCK_ZAPPED); - - if (err) - sk->sk_err = err; - - if (parent) { - bt_accept_unlink(sk); - parent->sk_data_ready(parent, 0); - } else - sk->sk_state_change(sk); - - skb_queue_purge(TX_QUEUE(sk)); - - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { - struct srej_list *l, *tmp; - - del_timer(&l2cap_pi(sk)->retrans_timer); - del_timer(&l2cap_pi(sk)->monitor_timer); - del_timer(&l2cap_pi(sk)->ack_timer); - - skb_queue_purge(SREJ_QUEUE(sk)); - skb_queue_purge(BUSY_QUEUE(sk)); - - list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) { - list_del(&l->list); - kfree(l); - } - } -} - -static inline u8 l2cap_get_auth_type(struct sock *sk) -{ - if (sk->sk_type == SOCK_RAW) { - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - return HCI_AT_DEDICATED_BONDING_MITM; - case BT_SECURITY_MEDIUM: - return HCI_AT_DEDICATED_BONDING; - default: - return HCI_AT_NO_BONDING; - } - } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { - if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - - if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) - return HCI_AT_NO_BONDING_MITM; - else - return HCI_AT_NO_BONDING; - } else { - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - return HCI_AT_GENERAL_BONDING_MITM; - case BT_SECURITY_MEDIUM: - return HCI_AT_GENERAL_BONDING; - default: - return HCI_AT_NO_BONDING; - } - } -} - -/* Service level security */ -static inline int l2cap_check_security(struct sock *sk) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - __u8 auth_type; - - auth_type = l2cap_get_auth_type(sk); - - return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level, - auth_type); -} - -static inline u8 l2cap_get_ident(struct l2cap_conn *conn) -{ - u8 id; - - /* Get next available identificator. - * 1 - 128 are used by kernel. - * 129 - 199 are reserved. - * 200 - 254 are used by utilities like l2ping, etc. - */ - - spin_lock_bh(&conn->lock); - - if (++conn->tx_ident > 128) - conn->tx_ident = 1; - - id = conn->tx_ident; - - spin_unlock_bh(&conn->lock); - - return id; -} - -static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) -{ - struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); - u8 flags; - - BT_DBG("code 0x%2.2x", code); - - if (!skb) - return; - - if (lmp_no_flush_capable(conn->hcon->hdev)) - flags = ACL_START_NO_FLUSH; - else - flags = ACL_START; - - hci_send_acl(conn->hcon, skb, flags); -} - -static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) -{ - struct sk_buff *skb; - struct l2cap_hdr *lh; - struct l2cap_conn *conn = pi->conn; - struct sock *sk = (struct sock *)pi; - int count, hlen = L2CAP_HDR_SIZE + 2; - u8 flags; - - if (sk->sk_state != BT_CONNECTED) - return; - - if (pi->fcs == L2CAP_FCS_CRC16) - hlen += 2; - - BT_DBG("pi %p, control 0x%2.2x", pi, control); - - count = min_t(unsigned int, conn->mtu, hlen); - control |= L2CAP_CTRL_FRAME_TYPE; - - if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { - control |= L2CAP_CTRL_FINAL; - pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; - } - - if (pi->conn_state & L2CAP_CONN_SEND_PBIT) { - control |= L2CAP_CTRL_POLL; - pi->conn_state &= ~L2CAP_CONN_SEND_PBIT; - } - - skb = bt_skb_alloc(count, GFP_ATOMIC); - if (!skb) - return; - - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); - lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE); - lh->cid = cpu_to_le16(pi->dcid); - put_unaligned_le16(control, skb_put(skb, 2)); - - if (pi->fcs == L2CAP_FCS_CRC16) { - u16 fcs = crc16(0, (u8 *)lh, count - 2); - put_unaligned_le16(fcs, skb_put(skb, 2)); - } - - if (lmp_no_flush_capable(conn->hcon->hdev)) - flags = ACL_START_NO_FLUSH; - else - flags = ACL_START; - - hci_send_acl(pi->conn->hcon, skb, flags); -} - -static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) -{ - if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { - control |= L2CAP_SUPER_RCV_NOT_READY; - pi->conn_state |= L2CAP_CONN_RNR_SENT; - } else - control |= L2CAP_SUPER_RCV_READY; - - control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - - l2cap_send_sframe(pi, control); -} - -static inline int __l2cap_no_conn_pending(struct sock *sk) -{ - return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND); -} - -static void l2cap_do_start(struct sock *sk) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { - if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) - return; - - if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) { - struct l2cap_conn_req req; - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; - - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_REQ, sizeof(req), &req); - } - } else { - struct l2cap_info_req req; - req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); - - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; - conn->info_ident = l2cap_get_ident(conn); - - mod_timer(&conn->info_timer, jiffies + - msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); - - l2cap_send_cmd(conn, conn->info_ident, - L2CAP_INFO_REQ, sizeof(req), &req); - } -} - -static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) -{ - u32 local_feat_mask = l2cap_feat_mask; - if (!disable_ertm) - local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING; - - switch (mode) { - case L2CAP_MODE_ERTM: - return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask; - case L2CAP_MODE_STREAMING: - return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask; - default: - return 0x00; - } -} - -static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) -{ - struct l2cap_disconn_req req; - - if (!conn) - return; - - skb_queue_purge(TX_QUEUE(sk)); - - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { - del_timer(&l2cap_pi(sk)->retrans_timer); - del_timer(&l2cap_pi(sk)->monitor_timer); - del_timer(&l2cap_pi(sk)->ack_timer); - } - - req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid); - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - l2cap_send_cmd(conn, l2cap_get_ident(conn), - L2CAP_DISCONN_REQ, sizeof(req), &req); - - sk->sk_state = BT_DISCONN; - sk->sk_err = err; -} - -/* ---- L2CAP connections ---- */ -static void l2cap_conn_start(struct l2cap_conn *conn) -{ - struct l2cap_chan_list *l = &conn->chan_list; - struct sock_del_list del, *tmp1, *tmp2; - struct sock *sk; - - BT_DBG("conn %p", conn); - - INIT_LIST_HEAD(&del.list); - - read_lock(&l->lock); - - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - bh_lock_sock(sk); - - if (sk->sk_type != SOCK_SEQPACKET && - sk->sk_type != SOCK_STREAM) { - bh_unlock_sock(sk); - continue; - } - - if (sk->sk_state == BT_CONNECT) { - struct l2cap_conn_req req; - - if (!l2cap_check_security(sk) || - !__l2cap_no_conn_pending(sk)) { - bh_unlock_sock(sk); - continue; - } - - if (!l2cap_mode_supported(l2cap_pi(sk)->mode, - conn->feat_mask) - && l2cap_pi(sk)->conf_state & - L2CAP_CONF_STATE2_DEVICE) { - tmp1 = kzalloc(sizeof(struct sock_del_list), - GFP_ATOMIC); - tmp1->sk = sk; - list_add_tail(&tmp1->list, &del.list); - bh_unlock_sock(sk); - continue; - } - - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; - - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_REQ, sizeof(req), &req); - - } else if (sk->sk_state == BT_CONNECT2) { - struct l2cap_conn_rsp rsp; - char buf[128]; - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - - if (l2cap_check_security(sk)) { - if (bt_sk(sk)->defer_setup) { - struct sock *parent = bt_sk(sk)->parent; - rsp.result = cpu_to_le16(L2CAP_CR_PEND); - rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND); - parent->sk_data_ready(parent, 0); - - } else { - sk->sk_state = BT_CONFIG; - rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - } - } else { - rsp.result = cpu_to_le16(L2CAP_CR_PEND); - rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); - } - - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - - if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT || - rsp.result != L2CAP_CR_SUCCESS) { - bh_unlock_sock(sk); - continue; - } - - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, buf), buf); - l2cap_pi(sk)->num_conf_req++; - } - - bh_unlock_sock(sk); - } - - read_unlock(&l->lock); - - list_for_each_entry_safe(tmp1, tmp2, &del.list, list) { - bh_lock_sock(tmp1->sk); - __l2cap_sock_close(tmp1->sk, ECONNRESET); - bh_unlock_sock(tmp1->sk); - list_del(&tmp1->list); - kfree(tmp1); - } -} - -static void l2cap_conn_ready(struct l2cap_conn *conn) -{ - struct l2cap_chan_list *l = &conn->chan_list; - struct sock *sk; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - bh_lock_sock(sk); - - if (sk->sk_type != SOCK_SEQPACKET && - sk->sk_type != SOCK_STREAM) { - l2cap_sock_clear_timer(sk); - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); - } else if (sk->sk_state == BT_CONNECT) - l2cap_do_start(sk); - - bh_unlock_sock(sk); - } - - read_unlock(&l->lock); -} - -/* Notify sockets that we cannot guaranty reliability anymore */ -static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) -{ - struct l2cap_chan_list *l = &conn->chan_list; - struct sock *sk; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - if (l2cap_pi(sk)->force_reliable) - sk->sk_err = err; - } - - read_unlock(&l->lock); -} - -static void l2cap_info_timeout(unsigned long arg) -{ - struct l2cap_conn *conn = (void *) arg; - - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; - conn->info_ident = 0; - - l2cap_conn_start(conn); -} - -static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) -{ - struct l2cap_conn *conn = hcon->l2cap_data; - - if (conn || status) - return conn; - - conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC); - if (!conn) - return NULL; - - hcon->l2cap_data = conn; - conn->hcon = hcon; - - BT_DBG("hcon %p conn %p", hcon, conn); - - conn->mtu = hcon->hdev->acl_mtu; - conn->src = &hcon->hdev->bdaddr; - conn->dst = &hcon->dst; - - conn->feat_mask = 0; - - spin_lock_init(&conn->lock); - rwlock_init(&conn->chan_list.lock); - - setup_timer(&conn->info_timer, l2cap_info_timeout, - (unsigned long) conn); - - conn->disc_reason = 0x13; - - return conn; -} - -static void l2cap_conn_del(struct hci_conn *hcon, int err) -{ - struct l2cap_conn *conn = hcon->l2cap_data; - struct sock *sk; - - if (!conn) - return; - - BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); - - kfree_skb(conn->rx_skb); - - /* Kill channels */ - while ((sk = conn->chan_list.head)) { - bh_lock_sock(sk); - l2cap_chan_del(sk, err); - bh_unlock_sock(sk); - l2cap_sock_kill(sk); - } - - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) - del_timer_sync(&conn->info_timer); - - hcon->l2cap_data = NULL; - kfree(conn); -} - -static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) -{ - struct l2cap_chan_list *l = &conn->chan_list; - write_lock_bh(&l->lock); - __l2cap_chan_add(conn, sk, parent); - write_unlock_bh(&l->lock); -} - -/* ---- Socket interface ---- */ -static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) -{ - struct sock *sk; - struct hlist_node *node; - sk_for_each(sk, node, &l2cap_sk_list.head) - if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src)) - goto found; - sk = NULL; -found: - return sk; -} - -/* Find socket with psm and source bdaddr. - * Returns closest match. - */ -static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) -{ - struct sock *sk = NULL, *sk1 = NULL; - struct hlist_node *node; - - read_lock(&l2cap_sk_list.lock); - - sk_for_each(sk, node, &l2cap_sk_list.head) { - if (state && sk->sk_state != state) - continue; - - if (l2cap_pi(sk)->psm == psm) { - /* Exact match. */ - if (!bacmp(&bt_sk(sk)->src, src)) - break; - - /* Closest match */ - if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) - sk1 = sk; - } - } - - read_unlock(&l2cap_sk_list.lock); - - return node ? sk : sk1; -} - -static void l2cap_sock_destruct(struct sock *sk) -{ - BT_DBG("sk %p", sk); - - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); -} - -static void l2cap_sock_cleanup_listen(struct sock *parent) -{ - struct sock *sk; - - BT_DBG("parent %p", parent); - - /* Close not yet accepted channels */ - while ((sk = bt_accept_dequeue(parent, NULL))) - l2cap_sock_close(sk); - - parent->sk_state = BT_CLOSED; - sock_set_flag(parent, SOCK_ZAPPED); -} - -/* Kill socket (only if zapped and orphan) - * Must be called on unlocked socket. - */ -static void l2cap_sock_kill(struct sock *sk) -{ - if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) - return; - - BT_DBG("sk %p state %d", sk, sk->sk_state); - - /* Kill poor orphan */ - bt_sock_unlink(&l2cap_sk_list, sk); - sock_set_flag(sk, SOCK_DEAD); - sock_put(sk); -} - -static void __l2cap_sock_close(struct sock *sk, int reason) -{ - BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); - - switch (sk->sk_state) { - case BT_LISTEN: - l2cap_sock_cleanup_listen(sk); - break; - - case BT_CONNECTED: - case BT_CONFIG: - if (sk->sk_type == SOCK_SEQPACKET || - sk->sk_type == SOCK_STREAM) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - - l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - l2cap_send_disconn_req(conn, sk, reason); - } else - l2cap_chan_del(sk, reason); - break; - - case BT_CONNECT2: - if (sk->sk_type == SOCK_SEQPACKET || - sk->sk_type == SOCK_STREAM) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct l2cap_conn_rsp rsp; - __u16 result; - - if (bt_sk(sk)->defer_setup) - result = L2CAP_CR_SEC_BLOCK; - else - result = L2CAP_CR_BAD_PSM; - sk->sk_state = BT_DISCONN; - - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - } else - l2cap_chan_del(sk, reason); - break; - - case BT_CONNECT: - case BT_DISCONN: - l2cap_chan_del(sk, reason); - break; - - default: - sock_set_flag(sk, SOCK_ZAPPED); - break; - } -} - -/* Must be called on unlocked socket. */ -static void l2cap_sock_close(struct sock *sk) -{ - l2cap_sock_clear_timer(sk); - lock_sock(sk); - __l2cap_sock_close(sk, ECONNRESET); - release_sock(sk); - l2cap_sock_kill(sk); -} - -static void l2cap_sock_init(struct sock *sk, struct sock *parent) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - - BT_DBG("sk %p", sk); - - if (parent) { - sk->sk_type = parent->sk_type; - bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup; - - pi->imtu = l2cap_pi(parent)->imtu; - pi->omtu = l2cap_pi(parent)->omtu; - pi->conf_state = l2cap_pi(parent)->conf_state; - pi->mode = l2cap_pi(parent)->mode; - pi->fcs = l2cap_pi(parent)->fcs; - pi->max_tx = l2cap_pi(parent)->max_tx; - pi->tx_win = l2cap_pi(parent)->tx_win; - pi->sec_level = l2cap_pi(parent)->sec_level; - pi->role_switch = l2cap_pi(parent)->role_switch; - pi->force_reliable = l2cap_pi(parent)->force_reliable; - pi->flushable = l2cap_pi(parent)->flushable; - } else { - pi->imtu = L2CAP_DEFAULT_MTU; - pi->omtu = 0; - if (!disable_ertm && sk->sk_type == SOCK_STREAM) { - pi->mode = L2CAP_MODE_ERTM; - pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; - } else { - pi->mode = L2CAP_MODE_BASIC; - } - pi->max_tx = L2CAP_DEFAULT_MAX_TX; - pi->fcs = L2CAP_FCS_CRC16; - pi->tx_win = L2CAP_DEFAULT_TX_WINDOW; - pi->sec_level = BT_SECURITY_LOW; - pi->role_switch = 0; - pi->force_reliable = 0; - pi->flushable = BT_FLUSHABLE_OFF; - } - - /* Default config options */ - pi->conf_len = 0; - pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; - skb_queue_head_init(TX_QUEUE(sk)); - skb_queue_head_init(SREJ_QUEUE(sk)); - skb_queue_head_init(BUSY_QUEUE(sk)); - INIT_LIST_HEAD(SREJ_LIST(sk)); -} - -static struct proto l2cap_proto = { - .name = "L2CAP", - .owner = THIS_MODULE, - .obj_size = sizeof(struct l2cap_pinfo) -}; - -static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) -{ - struct sock *sk; - - sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); - if (!sk) - return NULL; - - sock_init_data(sock, sk); - INIT_LIST_HEAD(&bt_sk(sk)->accept_q); - - sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); - - sock_reset_flag(sk, SOCK_ZAPPED); - - sk->sk_protocol = proto; - sk->sk_state = BT_OPEN; - - setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk); - - bt_sock_link(&l2cap_sk_list, sk); - return sk; -} - -static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - BT_DBG("sock %p", sock); - - sock->state = SS_UNCONNECTED; - - if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM && - sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) - return -ESOCKTNOSUPPORT; - - if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) - return -EPERM; - - sock->ops = &l2cap_sock_ops; - - sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); - if (!sk) - return -ENOMEM; - - l2cap_sock_init(sk, NULL); - return 0; -} - -static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) -{ - struct sock *sk = sock->sk; - struct sockaddr_l2 la; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (!addr || addr->sa_family != AF_BLUETOOTH) - return -EINVAL; - - memset(&la, 0, sizeof(la)); - len = min_t(unsigned int, sizeof(la), alen); - memcpy(&la, addr, len); - - if (la.l2_cid) - return -EINVAL; - - lock_sock(sk); - - if (sk->sk_state != BT_OPEN) { - err = -EBADFD; - goto done; - } - - if (la.l2_psm) { - __u16 psm = __le16_to_cpu(la.l2_psm); - - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((psm & 0x0101) != 0x0001) { - err = -EINVAL; - goto done; - } - - /* Restrict usage of well-known PSMs */ - if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { - err = -EACCES; - goto done; - } - } - - write_lock_bh(&l2cap_sk_list.lock); - - if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) { - err = -EADDRINUSE; - } else { - /* Save source address */ - bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); - l2cap_pi(sk)->psm = la.l2_psm; - l2cap_pi(sk)->sport = la.l2_psm; - sk->sk_state = BT_BOUND; - - if (__le16_to_cpu(la.l2_psm) == 0x0001 || - __le16_to_cpu(la.l2_psm) == 0x0003) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - } - - write_unlock_bh(&l2cap_sk_list.lock); - -done: - release_sock(sk); - return err; -} - -static int l2cap_do_connect(struct sock *sk) -{ - bdaddr_t *src = &bt_sk(sk)->src; - bdaddr_t *dst = &bt_sk(sk)->dst; - struct l2cap_conn *conn; - struct hci_conn *hcon; - struct hci_dev *hdev; - __u8 auth_type; - int err; - - BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), - l2cap_pi(sk)->psm); - - hdev = hci_get_route(dst, src); - if (!hdev) - return -EHOSTUNREACH; - - hci_dev_lock_bh(hdev); - - err = -ENOMEM; - - auth_type = l2cap_get_auth_type(sk); - - hcon = hci_connect(hdev, ACL_LINK, dst, - l2cap_pi(sk)->sec_level, auth_type); - if (!hcon) - goto done; - - conn = l2cap_conn_add(hcon, 0); - if (!conn) { - hci_conn_put(hcon); - goto done; - } - - err = 0; - - /* Update source addr of the socket */ - bacpy(src, conn->src); - - l2cap_chan_add(conn, sk, NULL); - - sk->sk_state = BT_CONNECT; - l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - - if (hcon->state == BT_CONNECTED) { - if (sk->sk_type != SOCK_SEQPACKET && - sk->sk_type != SOCK_STREAM) { - l2cap_sock_clear_timer(sk); - if (l2cap_check_security(sk)) - sk->sk_state = BT_CONNECTED; - } else - l2cap_do_start(sk); - } - -done: - hci_dev_unlock_bh(hdev); - hci_dev_put(hdev); - return err; -} - -static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) -{ - struct sock *sk = sock->sk; - struct sockaddr_l2 la; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (!addr || alen < sizeof(addr->sa_family) || - addr->sa_family != AF_BLUETOOTH) - return -EINVAL; - - memset(&la, 0, sizeof(la)); - len = min_t(unsigned int, sizeof(la), alen); - memcpy(&la, addr, len); - - if (la.l2_cid) - return -EINVAL; - - lock_sock(sk); - - if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) - && !la.l2_psm) { - err = -EINVAL; - goto done; - } - - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -ENOTSUPP; - goto done; - } - - switch (sk->sk_state) { - case BT_CONNECT: - case BT_CONNECT2: - case BT_CONFIG: - /* Already connecting */ - goto wait; - - case BT_CONNECTED: - /* Already connected */ - err = -EISCONN; - goto done; - - case BT_OPEN: - case BT_BOUND: - /* Can connect */ - break; - - default: - err = -EBADFD; - goto done; - } - - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && - sk->sk_type != SOCK_RAW) { - err = -EINVAL; - goto done; - } - - /* Set destination address and psm */ - bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); - l2cap_pi(sk)->psm = la.l2_psm; - - err = l2cap_do_connect(sk); - if (err) - goto done; - -wait: - err = bt_sock_wait_state(sk, BT_CONNECTED, - sock_sndtimeo(sk, flags & O_NONBLOCK)); -done: - release_sock(sk); - return err; -} - -static int l2cap_sock_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - int err = 0; - - BT_DBG("sk %p backlog %d", sk, backlog); - - lock_sock(sk); - - if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) - || sk->sk_state != BT_BOUND) { - err = -EBADFD; - goto done; - } - - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -ENOTSUPP; - goto done; - } - - if (!l2cap_pi(sk)->psm) { - bdaddr_t *src = &bt_sk(sk)->src; - u16 psm; - - err = -EINVAL; - - write_lock_bh(&l2cap_sk_list.lock); - - for (psm = 0x1001; psm < 0x1100; psm += 2) - if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) { - l2cap_pi(sk)->psm = cpu_to_le16(psm); - l2cap_pi(sk)->sport = cpu_to_le16(psm); - err = 0; - break; - } - - write_unlock_bh(&l2cap_sk_list.lock); - - if (err < 0) - goto done; - } - - sk->sk_max_ack_backlog = backlog; - sk->sk_ack_backlog = 0; - sk->sk_state = BT_LISTEN; - -done: - release_sock(sk); - return err; -} - -static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) -{ - DECLARE_WAITQUEUE(wait, current); - struct sock *sk = sock->sk, *nsk; - long timeo; - int err = 0; - - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - - BT_DBG("sk %p timeo %ld", sk, timeo); - - /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { - set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - break; - } - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk_sleep(sk), &wait); - - if (err) - goto done; - - newsock->state = SS_CONNECTED; - - BT_DBG("new socket %p", nsk); - -done: - release_sock(sk); - return err; -} - -static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) -{ - struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; - struct sock *sk = sock->sk; - - BT_DBG("sock %p, sk %p", sock, sk); - - addr->sa_family = AF_BLUETOOTH; - *len = sizeof(struct sockaddr_l2); - - if (peer) { - la->l2_psm = l2cap_pi(sk)->psm; - bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); - la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid); - } else { - la->l2_psm = l2cap_pi(sk)->sport; - bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); - la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid); - } - - return 0; -} - -static int __l2cap_wait_ack(struct sock *sk) -{ - DECLARE_WAITQUEUE(wait, current); - int err = 0; - int timeo = HZ/5; - - add_wait_queue(sk_sleep(sk), &wait); - while ((l2cap_pi(sk)->unacked_frames > 0 && l2cap_pi(sk)->conn)) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!timeo) - timeo = HZ/5; - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - - err = sock_error(sk); - if (err) - break; - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk_sleep(sk), &wait); - return err; -} - -static void l2cap_monitor_timeout(unsigned long arg) -{ - struct sock *sk = (void *) arg; - - BT_DBG("sk %p", sk); - - bh_lock_sock(sk); - if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) { - l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED); - bh_unlock_sock(sk); - return; - } - - l2cap_pi(sk)->retry_count++; - __mod_monitor_timer(); - - l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL); - bh_unlock_sock(sk); -} - -static void l2cap_retrans_timeout(unsigned long arg) -{ - struct sock *sk = (void *) arg; - - BT_DBG("sk %p", sk); - - bh_lock_sock(sk); - l2cap_pi(sk)->retry_count = 1; - __mod_monitor_timer(); - - l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; - - l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL); - bh_unlock_sock(sk); -} - -static void l2cap_drop_acked_frames(struct sock *sk) -{ - struct sk_buff *skb; - - while ((skb = skb_peek(TX_QUEUE(sk))) && - l2cap_pi(sk)->unacked_frames) { - if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq) - break; - - skb = skb_dequeue(TX_QUEUE(sk)); - kfree_skb(skb); - - l2cap_pi(sk)->unacked_frames--; - } - - if (!l2cap_pi(sk)->unacked_frames) - del_timer(&l2cap_pi(sk)->retrans_timer); -} - -static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct hci_conn *hcon = pi->conn->hcon; - u16 flags; - - BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len); - - if (!pi->flushable && lmp_no_flush_capable(hcon->hdev)) - flags = ACL_START_NO_FLUSH; - else - flags = ACL_START; - - hci_send_acl(hcon, skb, flags); -} - -static void l2cap_streaming_send(struct sock *sk) -{ - struct sk_buff *skb; - struct l2cap_pinfo *pi = l2cap_pi(sk); - u16 control, fcs; - - while ((skb = skb_dequeue(TX_QUEUE(sk)))) { - control = get_unaligned_le16(skb->data + L2CAP_HDR_SIZE); - control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT; - put_unaligned_le16(control, skb->data + L2CAP_HDR_SIZE); - - if (pi->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)skb->data, skb->len - 2); - put_unaligned_le16(fcs, skb->data + skb->len - 2); - } - - l2cap_do_send(sk, skb); - - pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; - } -} - -static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct sk_buff *skb, *tx_skb; - u16 control, fcs; - - skb = skb_peek(TX_QUEUE(sk)); - if (!skb) - return; - - do { - if (bt_cb(skb)->tx_seq == tx_seq) - break; - - if (skb_queue_is_last(TX_QUEUE(sk), skb)) - return; - - } while ((skb = skb_queue_next(TX_QUEUE(sk), skb))); - - if (pi->remote_max_tx && - bt_cb(skb)->retries == pi->remote_max_tx) { - l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED); - return; - } - - tx_skb = skb_clone(skb, GFP_ATOMIC); - bt_cb(skb)->retries++; - control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); - - if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { - control |= L2CAP_CTRL_FINAL; - pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; - } - - control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) - | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); - - put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); - - if (pi->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2); - put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2); - } - - l2cap_do_send(sk, tx_skb); -} - -static int l2cap_ertm_send(struct sock *sk) -{ - struct sk_buff *skb, *tx_skb; - struct l2cap_pinfo *pi = l2cap_pi(sk); - u16 control, fcs; - int nsent = 0; - - if (sk->sk_state != BT_CONNECTED) - return -ENOTCONN; - - while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) { - - if (pi->remote_max_tx && - bt_cb(skb)->retries == pi->remote_max_tx) { - l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED); - break; - } - - tx_skb = skb_clone(skb, GFP_ATOMIC); - - bt_cb(skb)->retries++; - - control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); - control &= L2CAP_CTRL_SAR; - - if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { - control |= L2CAP_CTRL_FINAL; - pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; - } - control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) - | (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); - put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); - - - if (pi->fcs == L2CAP_FCS_CRC16) { - fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2); - put_unaligned_le16(fcs, skb->data + tx_skb->len - 2); - } - - l2cap_do_send(sk, tx_skb); - - __mod_retrans_timer(); - - bt_cb(skb)->tx_seq = pi->next_tx_seq; - pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; - - pi->unacked_frames++; - pi->frames_sent++; - - if (skb_queue_is_last(TX_QUEUE(sk), skb)) - sk->sk_send_head = NULL; - else - sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb); - - nsent++; - } - - return nsent; -} - -static int l2cap_retransmit_frames(struct sock *sk) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - int ret; - - if (!skb_queue_empty(TX_QUEUE(sk))) - sk->sk_send_head = TX_QUEUE(sk)->next; - - pi->next_tx_seq = pi->expected_ack_seq; - ret = l2cap_ertm_send(sk); - return ret; -} - -static void l2cap_send_ack(struct l2cap_pinfo *pi) -{ - struct sock *sk = (struct sock *)pi; - u16 control = 0; - - control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - - if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { - control |= L2CAP_SUPER_RCV_NOT_READY; - pi->conn_state |= L2CAP_CONN_RNR_SENT; - l2cap_send_sframe(pi, control); - return; - } - - if (l2cap_ertm_send(sk) > 0) - return; - - control |= L2CAP_SUPER_RCV_READY; - l2cap_send_sframe(pi, control); -} - -static void l2cap_send_srejtail(struct sock *sk) -{ - struct srej_list *tail; - u16 control; - - control = L2CAP_SUPER_SELECT_REJECT; - control |= L2CAP_CTRL_FINAL; - - tail = list_entry(SREJ_LIST(sk)->prev, struct srej_list, list); - control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; - - l2cap_send_sframe(l2cap_pi(sk), control); -} - -static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct sk_buff **frag; - int err, sent = 0; - - if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) - return -EFAULT; - - sent += count; - len -= count; - - /* Continuation fragments (no L2CAP header) */ - frag = &skb_shinfo(skb)->frag_list; - while (len) { - count = min_t(unsigned int, conn->mtu, len); - - *frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err); - if (!*frag) - return err; - if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count)) - return -EFAULT; - - sent += count; - len -= count; - - frag = &(*frag)->next; - } - - return sent; -} - -static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct sk_buff *skb; - int err, count, hlen = L2CAP_HDR_SIZE + 2; - struct l2cap_hdr *lh; - - BT_DBG("sk %p len %d", sk, (int)len); - - count = min_t(unsigned int, (conn->mtu - hlen), len); - skb = bt_skb_send_alloc(sk, count + hlen, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - return ERR_PTR(err); - - /* Create L2CAP header */ - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); - lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid); - lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); - put_unaligned_le16(l2cap_pi(sk)->psm, skb_put(skb, 2)); - - err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb); - if (unlikely(err < 0)) { - kfree_skb(skb); - return ERR_PTR(err); - } - return skb; -} - -static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct sk_buff *skb; - int err, count, hlen = L2CAP_HDR_SIZE; - struct l2cap_hdr *lh; - - BT_DBG("sk %p len %d", sk, (int)len); - - count = min_t(unsigned int, (conn->mtu - hlen), len); - skb = bt_skb_send_alloc(sk, count + hlen, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - return ERR_PTR(err); - - /* Create L2CAP header */ - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); - lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid); - lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); - - err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb); - if (unlikely(err < 0)) { - kfree_skb(skb); - return ERR_PTR(err); - } - return skb; -} - -static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen) -{ - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct sk_buff *skb; - int err, count, hlen = L2CAP_HDR_SIZE + 2; - struct l2cap_hdr *lh; - - BT_DBG("sk %p len %d", sk, (int)len); - - if (!conn) - return ERR_PTR(-ENOTCONN); - - if (sdulen) - hlen += 2; - - if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) - hlen += 2; - - count = min_t(unsigned int, (conn->mtu - hlen), len); - skb = bt_skb_send_alloc(sk, count + hlen, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - return ERR_PTR(err); - - /* Create L2CAP header */ - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); - lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid); - lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); - put_unaligned_le16(control, skb_put(skb, 2)); - if (sdulen) - put_unaligned_le16(sdulen, skb_put(skb, 2)); - - err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb); - if (unlikely(err < 0)) { - kfree_skb(skb); - return ERR_PTR(err); - } - - if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) - put_unaligned_le16(0, skb_put(skb, 2)); - - bt_cb(skb)->retries = 0; - return skb; -} - -static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct sk_buff *skb; - struct sk_buff_head sar_queue; - u16 control; - size_t size = 0; - - skb_queue_head_init(&sar_queue); - control = L2CAP_SDU_START; - skb = l2cap_create_iframe_pdu(sk, msg, pi->remote_mps, control, len); - if (IS_ERR(skb)) - return PTR_ERR(skb); - - __skb_queue_tail(&sar_queue, skb); - len -= pi->remote_mps; - size += pi->remote_mps; - - while (len > 0) { - size_t buflen; - - if (len > pi->remote_mps) { - control = L2CAP_SDU_CONTINUE; - buflen = pi->remote_mps; - } else { - control = L2CAP_SDU_END; - buflen = len; - } - - skb = l2cap_create_iframe_pdu(sk, msg, buflen, control, 0); - if (IS_ERR(skb)) { - skb_queue_purge(&sar_queue); - return PTR_ERR(skb); - } - - __skb_queue_tail(&sar_queue, skb); - len -= buflen; - size += buflen; - } - skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk)); - if (sk->sk_send_head == NULL) - sk->sk_send_head = sar_queue.next; - - return size; -} - -static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct sk_buff *skb; - u16 control; - int err; - - BT_DBG("sock %p, sk %p", sock, sk); - - err = sock_error(sk); - if (err) - return err; - - if (msg->msg_flags & MSG_OOB) - return -EOPNOTSUPP; - - lock_sock(sk); - - if (sk->sk_state != BT_CONNECTED) { - err = -ENOTCONN; - goto done; - } - - /* Connectionless channel */ - if (sk->sk_type == SOCK_DGRAM) { - skb = l2cap_create_connless_pdu(sk, msg, len); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - } else { - l2cap_do_send(sk, skb); - err = len; - } - goto done; - } - - switch (pi->mode) { - case L2CAP_MODE_BASIC: - /* Check outgoing MTU */ - if (len > pi->omtu) { - err = -EMSGSIZE; - goto done; - } - - /* Create a basic PDU */ - skb = l2cap_create_basic_pdu(sk, msg, len); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto done; - } - - l2cap_do_send(sk, skb); - err = len; - break; - - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - /* Entire SDU fits into one PDU */ - if (len <= pi->remote_mps) { - control = L2CAP_SDU_UNSEGMENTED; - skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto done; - } - __skb_queue_tail(TX_QUEUE(sk), skb); - - if (sk->sk_send_head == NULL) - sk->sk_send_head = skb; - - } else { - /* Segment SDU into multiples PDUs */ - err = l2cap_sar_segment_sdu(sk, msg, len); - if (err < 0) - goto done; - } - - if (pi->mode == L2CAP_MODE_STREAMING) { - l2cap_streaming_send(sk); - } else { - if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && - (pi->conn_state & L2CAP_CONN_WAIT_F)) { - err = len; - break; - } - err = l2cap_ertm_send(sk); - } - - if (err >= 0) - err = len; - break; - - default: - BT_DBG("bad state %1.1x", pi->mode); - err = -EBADFD; - } - -done: - release_sock(sk); - return err; -} - -static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) -{ - struct sock *sk = sock->sk; - - lock_sock(sk); - - if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { - struct l2cap_conn_rsp rsp; - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - u8 buf[128]; - - sk->sk_state = BT_CONFIG; - - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - - if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) { - release_sock(sk); - return 0; - } - - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, buf), buf); - l2cap_pi(sk)->num_conf_req++; - - release_sock(sk); - return 0; - } - - release_sock(sk); - - if (sock->type == SOCK_STREAM) - return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); - - return bt_sock_recvmsg(iocb, sock, msg, len, flags); -} - -static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct l2cap_options opts; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - lock_sock(sk); - - switch (optname) { - case L2CAP_OPTIONS: - if (sk->sk_state == BT_CONNECTED) { - err = -EINVAL; - break; - } - - opts.imtu = l2cap_pi(sk)->imtu; - opts.omtu = l2cap_pi(sk)->omtu; - opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = l2cap_pi(sk)->mode; - opts.fcs = l2cap_pi(sk)->fcs; - opts.max_tx = l2cap_pi(sk)->max_tx; - opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; - - len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_user((char *) &opts, optval, len)) { - err = -EFAULT; - break; - } - - if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { - err = -EINVAL; - break; - } - - l2cap_pi(sk)->mode = opts.mode; - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE; - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -EINVAL; - break; - } - - l2cap_pi(sk)->imtu = opts.imtu; - l2cap_pi(sk)->omtu = opts.omtu; - l2cap_pi(sk)->fcs = opts.fcs; - l2cap_pi(sk)->max_tx = opts.max_tx; - l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size; - break; - - case L2CAP_LM: - if (get_user(opt, (u32 __user *) optval)) { - err = -EFAULT; - break; - } - - if (opt & L2CAP_LM_AUTH) - l2cap_pi(sk)->sec_level = BT_SECURITY_LOW; - if (opt & L2CAP_LM_ENCRYPT) - l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM; - if (opt & L2CAP_LM_SECURE) - l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH; - - l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER); - l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE); - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct bt_security sec; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - if (level == SOL_L2CAP) - return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); - - if (level != SOL_BLUETOOTH) - return -ENOPROTOOPT; - - lock_sock(sk); - - switch (optname) { - case BT_SECURITY: - if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM - && sk->sk_type != SOCK_RAW) { - err = -EINVAL; - break; - } - - sec.level = BT_SECURITY_LOW; - - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_user((char *) &sec, optval, len)) { - err = -EFAULT; - break; - } - - if (sec.level < BT_SECURITY_LOW || - sec.level > BT_SECURITY_HIGH) { - err = -EINVAL; - break; - } - - l2cap_pi(sk)->sec_level = sec.level; - break; - - case BT_DEFER_SETUP: - if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { - err = -EINVAL; - break; - } - - if (get_user(opt, (u32 __user *) optval)) { - err = -EFAULT; - break; - } - - bt_sk(sk)->defer_setup = opt; - break; - - case BT_FLUSHABLE: - if (get_user(opt, (u32 __user *) optval)) { - err = -EFAULT; - break; - } - - if (opt > BT_FLUSHABLE_ON) { - err = -EINVAL; - break; - } - - if (opt == BT_FLUSHABLE_OFF) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - /* proceed futher only when we have l2cap_conn and - No Flush support in the LM */ - if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) { - err = -EINVAL; - break; - } - } - - l2cap_pi(sk)->flushable = opt; - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct l2cap_options opts; - struct l2cap_conninfo cinfo; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - if (get_user(len, optlen)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case L2CAP_OPTIONS: - opts.imtu = l2cap_pi(sk)->imtu; - opts.omtu = l2cap_pi(sk)->omtu; - opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = l2cap_pi(sk)->mode; - opts.fcs = l2cap_pi(sk)->fcs; - opts.max_tx = l2cap_pi(sk)->max_tx; - opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; - - len = min_t(unsigned int, len, sizeof(opts)); - if (copy_to_user(optval, (char *) &opts, len)) - err = -EFAULT; - - break; - - case L2CAP_LM: - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_LOW: - opt = L2CAP_LM_AUTH; - break; - case BT_SECURITY_MEDIUM: - opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT; - break; - case BT_SECURITY_HIGH: - opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT | - L2CAP_LM_SECURE; - break; - default: - opt = 0; - break; - } - - if (l2cap_pi(sk)->role_switch) - opt |= L2CAP_LM_MASTER; - - if (l2cap_pi(sk)->force_reliable) - opt |= L2CAP_LM_RELIABLE; - - if (put_user(opt, (u32 __user *) optval)) - err = -EFAULT; - break; - - case L2CAP_CONNINFO: - if (sk->sk_state != BT_CONNECTED && - !(sk->sk_state == BT_CONNECT2 && - bt_sk(sk)->defer_setup)) { - err = -ENOTCONN; - break; - } - - cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle; - memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3); - - len = min_t(unsigned int, len, sizeof(cinfo)); - if (copy_to_user(optval, (char *) &cinfo, len)) - err = -EFAULT; - - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct bt_security sec; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (level == SOL_L2CAP) - return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); - - if (level != SOL_BLUETOOTH) - return -ENOPROTOOPT; - - if (get_user(len, optlen)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case BT_SECURITY: - if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM - && sk->sk_type != SOCK_RAW) { - err = -EINVAL; - break; - } - - sec.level = l2cap_pi(sk)->sec_level; - - len = min_t(unsigned int, len, sizeof(sec)); - if (copy_to_user(optval, (char *) &sec, len)) - err = -EFAULT; - - break; - - case BT_DEFER_SETUP: - if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { - err = -EINVAL; - break; - } - - if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) - err = -EFAULT; - - break; - - case BT_FLUSHABLE: - if (put_user(l2cap_pi(sk)->flushable, (u32 __user *) optval)) - err = -EFAULT; - - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -static int l2cap_sock_shutdown(struct socket *sock, int how) -{ - struct sock *sk = sock->sk; - int err = 0; - - BT_DBG("sock %p, sk %p", sock, sk); - - if (!sk) - return 0; - - lock_sock(sk); - if (!sk->sk_shutdown) { - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) - err = __l2cap_wait_ack(sk); - - sk->sk_shutdown = SHUTDOWN_MASK; - l2cap_sock_clear_timer(sk); - __l2cap_sock_close(sk, 0); - - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) - err = bt_sock_wait_state(sk, BT_CLOSED, - sk->sk_lingertime); - } - - if (!err && sk->sk_err) - err = -sk->sk_err; - - release_sock(sk); - return err; -} - -static int l2cap_sock_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - int err; - - BT_DBG("sock %p, sk %p", sock, sk); - - if (!sk) - return 0; - - err = l2cap_sock_shutdown(sock, 2); - - sock_orphan(sk); - l2cap_sock_kill(sk); - return err; -} - -static void l2cap_chan_ready(struct sock *sk) -{ - struct sock *parent = bt_sk(sk)->parent; - - BT_DBG("sk %p, parent %p", sk, parent); - - l2cap_pi(sk)->conf_state = 0; - l2cap_sock_clear_timer(sk); - - if (!parent) { - /* Outgoing channel. - * Wake up socket sleeping on connect. - */ - sk->sk_state = BT_CONNECTED; - sk->sk_state_change(sk); - } else { - /* Incoming channel. - * Wake up socket sleeping on accept. - */ - parent->sk_data_ready(parent, 0); - } -} - -/* Copy frame to all raw sockets on that connection */ -static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) -{ - struct l2cap_chan_list *l = &conn->chan_list; - struct sk_buff *nskb; - struct sock *sk; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - if (sk->sk_type != SOCK_RAW) - continue; - - /* Don't send frame to the socket it came from */ - if (skb->sk == sk) - continue; - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) - continue; - - if (sock_queue_rcv_skb(sk, nskb)) - kfree_skb(nskb); - } - read_unlock(&l->lock); -} - -/* ---- L2CAP signalling commands ---- */ -static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, - u8 code, u8 ident, u16 dlen, void *data) -{ - struct sk_buff *skb, **frag; - struct l2cap_cmd_hdr *cmd; - struct l2cap_hdr *lh; - int len, count; - - BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %d", - conn, code, ident, dlen); - - len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen; - count = min_t(unsigned int, conn->mtu, len); - - skb = bt_skb_alloc(count, GFP_ATOMIC); - if (!skb) - return NULL; - - lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); - lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); - lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); - - cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE); - cmd->code = code; - cmd->ident = ident; - cmd->len = cpu_to_le16(dlen); - - if (dlen) { - count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE; - memcpy(skb_put(skb, count), data, count); - data += count; - } - - len -= skb->len; - - /* Continuation fragments (no L2CAP header) */ - frag = &skb_shinfo(skb)->frag_list; - while (len) { - count = min_t(unsigned int, conn->mtu, len); - - *frag = bt_skb_alloc(count, GFP_ATOMIC); - if (!*frag) - goto fail; - - memcpy(skb_put(*frag, count), data, count); - - len -= count; - data += count; - - frag = &(*frag)->next; - } - - return skb; - -fail: - kfree_skb(skb); - return NULL; -} - -static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned long *val) -{ - struct l2cap_conf_opt *opt = *ptr; - int len; - - len = L2CAP_CONF_OPT_SIZE + opt->len; - *ptr += len; - - *type = opt->type; - *olen = opt->len; - - switch (opt->len) { - case 1: - *val = *((u8 *) opt->val); - break; - - case 2: - *val = get_unaligned_le16(opt->val); - break; - - case 4: - *val = get_unaligned_le32(opt->val); - break; - - default: - *val = (unsigned long) opt->val; - break; - } - - BT_DBG("type 0x%2.2x len %d val 0x%lx", *type, opt->len, *val); - return len; -} - -static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) -{ - struct l2cap_conf_opt *opt = *ptr; - - BT_DBG("type 0x%2.2x len %d val 0x%lx", type, len, val); - - opt->type = type; - opt->len = len; - - switch (len) { - case 1: - *((u8 *) opt->val) = val; - break; - - case 2: - put_unaligned_le16(val, opt->val); - break; - - case 4: - put_unaligned_le32(val, opt->val); - break; - - default: - memcpy(opt->val, (void *) val, len); - break; - } - - *ptr += L2CAP_CONF_OPT_SIZE + len; -} - -static void l2cap_ack_timeout(unsigned long arg) -{ - struct sock *sk = (void *) arg; - - bh_lock_sock(sk); - l2cap_send_ack(l2cap_pi(sk)); - bh_unlock_sock(sk); -} - -static inline void l2cap_ertm_init(struct sock *sk) -{ - l2cap_pi(sk)->expected_ack_seq = 0; - l2cap_pi(sk)->unacked_frames = 0; - l2cap_pi(sk)->buffer_seq = 0; - l2cap_pi(sk)->num_acked = 0; - l2cap_pi(sk)->frames_sent = 0; - - setup_timer(&l2cap_pi(sk)->retrans_timer, - l2cap_retrans_timeout, (unsigned long) sk); - setup_timer(&l2cap_pi(sk)->monitor_timer, - l2cap_monitor_timeout, (unsigned long) sk); - setup_timer(&l2cap_pi(sk)->ack_timer, - l2cap_ack_timeout, (unsigned long) sk); - - __skb_queue_head_init(SREJ_QUEUE(sk)); - __skb_queue_head_init(BUSY_QUEUE(sk)); - - INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work); - - sk->sk_backlog_rcv = l2cap_ertm_data_rcv; -} - -static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) -{ - switch (mode) { - case L2CAP_MODE_STREAMING: - case L2CAP_MODE_ERTM: - if (l2cap_mode_supported(mode, remote_feat_mask)) - return mode; - /* fall through */ - default: - return L2CAP_MODE_BASIC; - } -} - -static int l2cap_build_conf_req(struct sock *sk, void *data) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct l2cap_conf_req *req = data; - struct l2cap_conf_rfc rfc = { .mode = pi->mode }; - void *ptr = req->data; - - BT_DBG("sk %p", sk); - - if (pi->num_conf_req || pi->num_conf_rsp) - goto done; - - switch (pi->mode) { - case L2CAP_MODE_STREAMING: - case L2CAP_MODE_ERTM: - if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE) - break; - - /* fall through */ - default: - pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask); - break; - } - -done: - if (pi->imtu != L2CAP_DEFAULT_MTU) - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); - - switch (pi->mode) { - case L2CAP_MODE_BASIC: - if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) && - !(pi->conn->feat_mask & L2CAP_FEAT_STREAMING)) - break; - - rfc.mode = L2CAP_MODE_BASIC; - rfc.txwin_size = 0; - rfc.max_transmit = 0; - rfc.retrans_timeout = 0; - rfc.monitor_timeout = 0; - rfc.max_pdu_size = 0; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); - break; - - case L2CAP_MODE_ERTM: - rfc.mode = L2CAP_MODE_ERTM; - rfc.txwin_size = pi->tx_win; - rfc.max_transmit = pi->max_tx; - rfc.retrans_timeout = 0; - rfc.monitor_timeout = 0; - rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); - if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) - rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); - - if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) - break; - - if (pi->fcs == L2CAP_FCS_NONE || - pi->conf_state & L2CAP_CONF_NO_FCS_RECV) { - pi->fcs = L2CAP_FCS_NONE; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, pi->fcs); - } - break; - - case L2CAP_MODE_STREAMING: - rfc.mode = L2CAP_MODE_STREAMING; - rfc.txwin_size = 0; - rfc.max_transmit = 0; - rfc.retrans_timeout = 0; - rfc.monitor_timeout = 0; - rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); - if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) - rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); - - if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) - break; - - if (pi->fcs == L2CAP_FCS_NONE || - pi->conf_state & L2CAP_CONF_NO_FCS_RECV) { - pi->fcs = L2CAP_FCS_NONE; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, pi->fcs); - } - break; - } - - /* FIXME: Need actual value of the flush timeout */ - //if (flush_to != L2CAP_DEFAULT_FLUSH_TO) - // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to); - - req->dcid = cpu_to_le16(pi->dcid); - req->flags = cpu_to_le16(0); - - return ptr - data; -} - -static int l2cap_parse_conf_req(struct sock *sk, void *data) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct l2cap_conf_rsp *rsp = data; - void *ptr = rsp->data; - void *req = pi->conf_req; - int len = pi->conf_len; - int type, hint, olen; - unsigned long val; - struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; - u16 mtu = L2CAP_DEFAULT_MTU; - u16 result = L2CAP_CONF_SUCCESS; - - BT_DBG("sk %p", sk); - - while (len >= L2CAP_CONF_OPT_SIZE) { - len -= l2cap_get_conf_opt(&req, &type, &olen, &val); - - hint = type & L2CAP_CONF_HINT; - type &= L2CAP_CONF_MASK; - - switch (type) { - case L2CAP_CONF_MTU: - mtu = val; - break; - - case L2CAP_CONF_FLUSH_TO: - pi->flush_to = val; - break; - - case L2CAP_CONF_QOS: - break; - - case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *) val, olen); - break; - - case L2CAP_CONF_FCS: - if (val == L2CAP_FCS_NONE) - pi->conf_state |= L2CAP_CONF_NO_FCS_RECV; - - break; - - default: - if (hint) - break; - - result = L2CAP_CONF_UNKNOWN; - *((u8 *) ptr++) = type; - break; - } - } - - if (pi->num_conf_rsp || pi->num_conf_req > 1) - goto done; - - switch (pi->mode) { - case L2CAP_MODE_STREAMING: - case L2CAP_MODE_ERTM: - if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) { - pi->mode = l2cap_select_mode(rfc.mode, - pi->conn->feat_mask); - break; - } - - if (pi->mode != rfc.mode) - return -ECONNREFUSED; - - break; - } - -done: - if (pi->mode != rfc.mode) { - result = L2CAP_CONF_UNACCEPT; - rfc.mode = pi->mode; - - if (pi->num_conf_rsp == 1) - return -ECONNREFUSED; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - } - - - if (result == L2CAP_CONF_SUCCESS) { - /* Configure output options and let the other side know - * which ones we don't like. */ - - if (mtu < L2CAP_DEFAULT_MIN_MTU) - result = L2CAP_CONF_UNACCEPT; - else { - pi->omtu = mtu; - pi->conf_state |= L2CAP_CONF_MTU_DONE; - } - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu); - - switch (rfc.mode) { - case L2CAP_MODE_BASIC: - pi->fcs = L2CAP_FCS_NONE; - pi->conf_state |= L2CAP_CONF_MODE_DONE; - break; - - case L2CAP_MODE_ERTM: - pi->remote_tx_win = rfc.txwin_size; - pi->remote_max_tx = rfc.max_transmit; - - if (le16_to_cpu(rfc.max_pdu_size) > pi->conn->mtu - 10) - rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); - - pi->remote_mps = le16_to_cpu(rfc.max_pdu_size); - - rfc.retrans_timeout = - le16_to_cpu(L2CAP_DEFAULT_RETRANS_TO); - rfc.monitor_timeout = - le16_to_cpu(L2CAP_DEFAULT_MONITOR_TO); - - pi->conf_state |= L2CAP_CONF_MODE_DONE; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - - break; - - case L2CAP_MODE_STREAMING: - if (le16_to_cpu(rfc.max_pdu_size) > pi->conn->mtu - 10) - rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); - - pi->remote_mps = le16_to_cpu(rfc.max_pdu_size); - - pi->conf_state |= L2CAP_CONF_MODE_DONE; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - - break; - - default: - result = L2CAP_CONF_UNACCEPT; - - memset(&rfc, 0, sizeof(rfc)); - rfc.mode = pi->mode; - } - - if (result == L2CAP_CONF_SUCCESS) - pi->conf_state |= L2CAP_CONF_OUTPUT_DONE; - } - rsp->scid = cpu_to_le16(pi->dcid); - rsp->result = cpu_to_le16(result); - rsp->flags = cpu_to_le16(0x0000); - - return ptr - data; -} - -static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, u16 *result) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct l2cap_conf_req *req = data; - void *ptr = req->data; - int type, olen; - unsigned long val; - struct l2cap_conf_rfc rfc; - - BT_DBG("sk %p, rsp %p, len %d, req %p", sk, rsp, len, data); - - while (len >= L2CAP_CONF_OPT_SIZE) { - len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); - - switch (type) { - case L2CAP_CONF_MTU: - if (val < L2CAP_DEFAULT_MIN_MTU) { - *result = L2CAP_CONF_UNACCEPT; - pi->imtu = L2CAP_DEFAULT_MIN_MTU; - } else - pi->imtu = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); - break; - - case L2CAP_CONF_FLUSH_TO: - pi->flush_to = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, - 2, pi->flush_to); - break; - - case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); - - if ((pi->conf_state & L2CAP_CONF_STATE2_DEVICE) && - rfc.mode != pi->mode) - return -ECONNREFUSED; - - pi->fcs = 0; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); - break; - } - } - - if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode) - return -ECONNREFUSED; - - pi->mode = rfc.mode; - - if (*result == L2CAP_CONF_SUCCESS) { - switch (rfc.mode) { - case L2CAP_MODE_ERTM: - pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); - pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); - pi->mps = le16_to_cpu(rfc.max_pdu_size); - break; - case L2CAP_MODE_STREAMING: - pi->mps = le16_to_cpu(rfc.max_pdu_size); - } - } - - req->dcid = cpu_to_le16(pi->dcid); - req->flags = cpu_to_le16(0x0000); - - return ptr - data; -} - -static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 flags) -{ - struct l2cap_conf_rsp *rsp = data; - void *ptr = rsp->data; - - BT_DBG("sk %p", sk); - - rsp->scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp->result = cpu_to_le16(result); - rsp->flags = cpu_to_le16(flags); - - return ptr - data; -} - -static void l2cap_conf_rfc_get(struct sock *sk, void *rsp, int len) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - int type, olen; - unsigned long val; - struct l2cap_conf_rfc rfc; - - BT_DBG("sk %p, rsp %p, len %d", sk, rsp, len); - - if ((pi->mode != L2CAP_MODE_ERTM) && (pi->mode != L2CAP_MODE_STREAMING)) - return; - - while (len >= L2CAP_CONF_OPT_SIZE) { - len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); - - switch (type) { - case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); - goto done; - } - } - -done: - switch (rfc.mode) { - case L2CAP_MODE_ERTM: - pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); - pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); - pi->mps = le16_to_cpu(rfc.max_pdu_size); - break; - case L2CAP_MODE_STREAMING: - pi->mps = le16_to_cpu(rfc.max_pdu_size); - } -} - -static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) -{ - struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data; - - if (rej->reason != 0x0000) - return 0; - - if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && - cmd->ident == conn->info_ident) { - del_timer(&conn->info_timer); - - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; - conn->info_ident = 0; - - l2cap_conn_start(conn); - } - - return 0; -} - -static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) -{ - struct l2cap_chan_list *list = &conn->chan_list; - struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; - struct l2cap_conn_rsp rsp; - struct sock *parent, *sk = NULL; - int result, status = L2CAP_CS_NO_INFO; - - u16 dcid = 0, scid = __le16_to_cpu(req->scid); - __le16 psm = req->psm; - - BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid); - - /* Check if we have socket listening on psm */ - parent = l2cap_get_sock_by_psm(BT_LISTEN, psm, conn->src); - if (!parent) { - result = L2CAP_CR_BAD_PSM; - goto sendresp; - } - - bh_lock_sock(parent); - - /* Check if the ACL is secure enough (if not SDP) */ - if (psm != cpu_to_le16(0x0001) && - !hci_conn_check_link_mode(conn->hcon)) { - conn->disc_reason = 0x05; - result = L2CAP_CR_SEC_BLOCK; - goto response; - } - - result = L2CAP_CR_NO_MEM; - - /* Check for backlog size */ - if (sk_acceptq_is_full(parent)) { - BT_DBG("backlog full %d", parent->sk_ack_backlog); - goto response; - } - - sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); - if (!sk) - goto response; - - write_lock_bh(&list->lock); - - /* Check if we already have channel with that dcid */ - if (__l2cap_get_chan_by_dcid(list, scid)) { - write_unlock_bh(&list->lock); - sock_set_flag(sk, SOCK_ZAPPED); - l2cap_sock_kill(sk); - goto response; - } - - hci_conn_hold(conn->hcon); - - l2cap_sock_init(sk, parent); - bacpy(&bt_sk(sk)->src, conn->src); - bacpy(&bt_sk(sk)->dst, conn->dst); - l2cap_pi(sk)->psm = psm; - l2cap_pi(sk)->dcid = scid; - - __l2cap_chan_add(conn, sk, parent); - dcid = l2cap_pi(sk)->scid; - - l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - - l2cap_pi(sk)->ident = cmd->ident; - - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) { - if (l2cap_check_security(sk)) { - if (bt_sk(sk)->defer_setup) { - sk->sk_state = BT_CONNECT2; - result = L2CAP_CR_PEND; - status = L2CAP_CS_AUTHOR_PEND; - parent->sk_data_ready(parent, 0); - } else { - sk->sk_state = BT_CONFIG; - result = L2CAP_CR_SUCCESS; - status = L2CAP_CS_NO_INFO; - } - } else { - sk->sk_state = BT_CONNECT2; - result = L2CAP_CR_PEND; - status = L2CAP_CS_AUTHEN_PEND; - } - } else { - sk->sk_state = BT_CONNECT2; - result = L2CAP_CR_PEND; - status = L2CAP_CS_NO_INFO; - } - - write_unlock_bh(&list->lock); - -response: - bh_unlock_sock(parent); - -sendresp: - rsp.scid = cpu_to_le16(scid); - rsp.dcid = cpu_to_le16(dcid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(status); - l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); - - if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) { - struct l2cap_info_req info; - info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); - - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; - conn->info_ident = l2cap_get_ident(conn); - - mod_timer(&conn->info_timer, jiffies + - msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); - - l2cap_send_cmd(conn, conn->info_ident, - L2CAP_INFO_REQ, sizeof(info), &info); - } - - if (sk && !(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) && - result == L2CAP_CR_SUCCESS) { - u8 buf[128]; - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, buf), buf); - l2cap_pi(sk)->num_conf_req++; - } - - return 0; -} - -static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) -{ - struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data; - u16 scid, dcid, result, status; - struct sock *sk; - u8 req[128]; - - scid = __le16_to_cpu(rsp->scid); - dcid = __le16_to_cpu(rsp->dcid); - result = __le16_to_cpu(rsp->result); - status = __le16_to_cpu(rsp->status); - - BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); - - if (scid) { - sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); - if (!sk) - return -EFAULT; - } else { - sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident); - if (!sk) - return -EFAULT; - } - - switch (result) { - case L2CAP_CR_SUCCESS: - sk->sk_state = BT_CONFIG; - l2cap_pi(sk)->ident = 0; - l2cap_pi(sk)->dcid = dcid; - l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; - - if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) - break; - - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - - l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, req), req); - l2cap_pi(sk)->num_conf_req++; - break; - - case L2CAP_CR_PEND: - l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; - break; - - default: - /* don't delete l2cap channel if sk is owned by user */ - if (sock_owned_by_user(sk)) { - sk->sk_state = BT_DISCONN; - l2cap_sock_clear_timer(sk); - l2cap_sock_set_timer(sk, HZ / 5); - break; - } - - l2cap_chan_del(sk, ECONNREFUSED); - break; - } - - bh_unlock_sock(sk); - return 0; -} - -static inline void set_default_fcs(struct l2cap_pinfo *pi) -{ - /* FCS is enabled only in ERTM or streaming mode, if one or both - * sides request it. - */ - if (pi->mode != L2CAP_MODE_ERTM && pi->mode != L2CAP_MODE_STREAMING) - pi->fcs = L2CAP_FCS_NONE; - else if (!(pi->conf_state & L2CAP_CONF_NO_FCS_RECV)) - pi->fcs = L2CAP_FCS_CRC16; -} - -static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) -{ - struct l2cap_conf_req *req = (struct l2cap_conf_req *) data; - u16 dcid, flags; - u8 rsp[64]; - struct sock *sk; - int len; - - dcid = __le16_to_cpu(req->dcid); - flags = __le16_to_cpu(req->flags); - - BT_DBG("dcid 0x%4.4x flags 0x%2.2x", dcid, flags); - - sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid); - if (!sk) - return -ENOENT; - - if (sk->sk_state != BT_CONFIG) { - struct l2cap_cmd_rej rej; - - rej.reason = cpu_to_le16(0x0002); - l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, - sizeof(rej), &rej); - goto unlock; - } - - /* Reject if config buffer is too small. */ - len = cmd_len - sizeof(*req); - if (l2cap_pi(sk)->conf_len + len > sizeof(l2cap_pi(sk)->conf_req)) { - l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, - l2cap_build_conf_rsp(sk, rsp, - L2CAP_CONF_REJECT, flags), rsp); - goto unlock; - } - - /* Store config. */ - memcpy(l2cap_pi(sk)->conf_req + l2cap_pi(sk)->conf_len, req->data, len); - l2cap_pi(sk)->conf_len += len; - - if (flags & 0x0001) { - /* Incomplete config. Send empty response. */ - l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, - l2cap_build_conf_rsp(sk, rsp, - L2CAP_CONF_SUCCESS, 0x0001), rsp); - goto unlock; - } - - /* Complete config. */ - len = l2cap_parse_conf_req(sk, rsp); - if (len < 0) { - l2cap_send_disconn_req(conn, sk, ECONNRESET); - goto unlock; - } - - l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - l2cap_pi(sk)->num_conf_rsp++; - - /* Reset config buffer. */ - l2cap_pi(sk)->conf_len = 0; - - if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE)) - goto unlock; - - if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) { - set_default_fcs(l2cap_pi(sk)); - - sk->sk_state = BT_CONNECTED; - - l2cap_pi(sk)->next_tx_seq = 0; - l2cap_pi(sk)->expected_tx_seq = 0; - __skb_queue_head_init(TX_QUEUE(sk)); - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) - l2cap_ertm_init(sk); - - l2cap_chan_ready(sk); - goto unlock; - } - - if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { - u8 buf[64]; - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, buf), buf); - l2cap_pi(sk)->num_conf_req++; - } - -unlock: - bh_unlock_sock(sk); - return 0; -} - -static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) -{ - struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data; - u16 scid, flags, result; - struct sock *sk; - int len = cmd->len - sizeof(*rsp); - - scid = __le16_to_cpu(rsp->scid); - flags = __le16_to_cpu(rsp->flags); - result = __le16_to_cpu(rsp->result); - - BT_DBG("scid 0x%4.4x flags 0x%2.2x result 0x%2.2x", - scid, flags, result); - - sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); - if (!sk) - return 0; - - switch (result) { - case L2CAP_CONF_SUCCESS: - l2cap_conf_rfc_get(sk, rsp->data, len); - break; - - case L2CAP_CONF_UNACCEPT: - if (l2cap_pi(sk)->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) { - char req[64]; - - if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { - l2cap_send_disconn_req(conn, sk, ECONNRESET); - goto done; - } - - /* throw out any old stored conf requests */ - result = L2CAP_CONF_SUCCESS; - len = l2cap_parse_conf_rsp(sk, rsp->data, - len, req, &result); - if (len < 0) { - l2cap_send_disconn_req(conn, sk, ECONNRESET); - goto done; - } - - l2cap_send_cmd(conn, l2cap_get_ident(conn), - L2CAP_CONF_REQ, len, req); - l2cap_pi(sk)->num_conf_req++; - if (result != L2CAP_CONF_SUCCESS) - goto done; - break; - } - - default: - sk->sk_err = ECONNRESET; - l2cap_sock_set_timer(sk, HZ * 5); - l2cap_send_disconn_req(conn, sk, ECONNRESET); - goto done; - } - - if (flags & 0x01) - goto done; - - l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE; - - if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) { - set_default_fcs(l2cap_pi(sk)); - - sk->sk_state = BT_CONNECTED; - l2cap_pi(sk)->next_tx_seq = 0; - l2cap_pi(sk)->expected_tx_seq = 0; - __skb_queue_head_init(TX_QUEUE(sk)); - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) - l2cap_ertm_init(sk); - - l2cap_chan_ready(sk); - } - -done: - bh_unlock_sock(sk); - return 0; -} - -static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) -{ - struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data; - struct l2cap_disconn_rsp rsp; - u16 dcid, scid; - struct sock *sk; - - scid = __le16_to_cpu(req->scid); - dcid = __le16_to_cpu(req->dcid); - - BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid); - - sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid); - if (!sk) - return 0; - - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp); - - sk->sk_shutdown = SHUTDOWN_MASK; - - /* don't delete l2cap channel if sk is owned by user */ - if (sock_owned_by_user(sk)) { - sk->sk_state = BT_DISCONN; - l2cap_sock_clear_timer(sk); - l2cap_sock_set_timer(sk, HZ / 5); - bh_unlock_sock(sk); - return 0; - } - - l2cap_chan_del(sk, ECONNRESET); - bh_unlock_sock(sk); - - l2cap_sock_kill(sk); - return 0; -} - -static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) -{ - struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data; - u16 dcid, scid; - struct sock *sk; - - scid = __le16_to_cpu(rsp->scid); - dcid = __le16_to_cpu(rsp->dcid); - - BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid); - - sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); - if (!sk) - return 0; - - /* don't delete l2cap channel if sk is owned by user */ - if (sock_owned_by_user(sk)) { - sk->sk_state = BT_DISCONN; - l2cap_sock_clear_timer(sk); - l2cap_sock_set_timer(sk, HZ / 5); - bh_unlock_sock(sk); - return 0; - } - - l2cap_chan_del(sk, 0); - bh_unlock_sock(sk); - - l2cap_sock_kill(sk); - return 0; -} - -static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) -{ - struct l2cap_info_req *req = (struct l2cap_info_req *) data; - u16 type; - - type = __le16_to_cpu(req->type); - - BT_DBG("type 0x%4.4x", type); - - if (type == L2CAP_IT_FEAT_MASK) { - u8 buf[8]; - u32 feat_mask = l2cap_feat_mask; - struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; - rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); - rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); - if (!disable_ertm) - feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING - | L2CAP_FEAT_FCS; - put_unaligned_le32(feat_mask, rsp->data); - l2cap_send_cmd(conn, cmd->ident, - L2CAP_INFO_RSP, sizeof(buf), buf); - } else if (type == L2CAP_IT_FIXED_CHAN) { - u8 buf[12]; - struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; - rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); - rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); - memcpy(buf + 4, l2cap_fixed_chan, 8); - l2cap_send_cmd(conn, cmd->ident, - L2CAP_INFO_RSP, sizeof(buf), buf); - } else { - struct l2cap_info_rsp rsp; - rsp.type = cpu_to_le16(type); - rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP); - l2cap_send_cmd(conn, cmd->ident, - L2CAP_INFO_RSP, sizeof(rsp), &rsp); - } - - return 0; -} - -static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) -{ - struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data; - u16 type, result; - - type = __le16_to_cpu(rsp->type); - result = __le16_to_cpu(rsp->result); - - BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); - - del_timer(&conn->info_timer); - - if (result != L2CAP_IR_SUCCESS) { - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; - conn->info_ident = 0; - - l2cap_conn_start(conn); - - return 0; - } - - if (type == L2CAP_IT_FEAT_MASK) { - conn->feat_mask = get_unaligned_le32(rsp->data); - - if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) { - struct l2cap_info_req req; - req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); - - conn->info_ident = l2cap_get_ident(conn); - - l2cap_send_cmd(conn, conn->info_ident, - L2CAP_INFO_REQ, sizeof(req), &req); - } else { - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; - conn->info_ident = 0; - - l2cap_conn_start(conn); - } - } else if (type == L2CAP_IT_FIXED_CHAN) { - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; - conn->info_ident = 0; - - l2cap_conn_start(conn); - } - - return 0; -} - -static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) -{ - u8 *data = skb->data; - int len = skb->len; - struct l2cap_cmd_hdr cmd; - int err = 0; - - l2cap_raw_recv(conn, skb); - - while (len >= L2CAP_CMD_HDR_SIZE) { - u16 cmd_len; - memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); - data += L2CAP_CMD_HDR_SIZE; - len -= L2CAP_CMD_HDR_SIZE; - - cmd_len = le16_to_cpu(cmd.len); - - BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, cmd.ident); - - if (cmd_len > len || !cmd.ident) { - BT_DBG("corrupted command"); - break; - } - - switch (cmd.code) { - case L2CAP_COMMAND_REJ: - l2cap_command_rej(conn, &cmd, data); - break; - - case L2CAP_CONN_REQ: - err = l2cap_connect_req(conn, &cmd, data); - break; - - case L2CAP_CONN_RSP: - err = l2cap_connect_rsp(conn, &cmd, data); - break; - - case L2CAP_CONF_REQ: - err = l2cap_config_req(conn, &cmd, cmd_len, data); - break; - - case L2CAP_CONF_RSP: - err = l2cap_config_rsp(conn, &cmd, data); - break; - - case L2CAP_DISCONN_REQ: - err = l2cap_disconnect_req(conn, &cmd, data); - break; - - case L2CAP_DISCONN_RSP: - err = l2cap_disconnect_rsp(conn, &cmd, data); - break; - - case L2CAP_ECHO_REQ: - l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data); - break; - - case L2CAP_ECHO_RSP: - break; - - case L2CAP_INFO_REQ: - err = l2cap_information_req(conn, &cmd, data); - break; - - case L2CAP_INFO_RSP: - err = l2cap_information_rsp(conn, &cmd, data); - break; - - default: - BT_ERR("Unknown signaling command 0x%2.2x", cmd.code); - err = -EINVAL; - break; - } - - if (err) { - struct l2cap_cmd_rej rej; - BT_DBG("error %d", err); - - /* FIXME: Map err to a valid reason */ - rej.reason = cpu_to_le16(0); - l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); - } - - data += cmd_len; - len -= cmd_len; - } - - kfree_skb(skb); -} - -static int l2cap_check_fcs(struct l2cap_pinfo *pi, struct sk_buff *skb) -{ - u16 our_fcs, rcv_fcs; - int hdr_size = L2CAP_HDR_SIZE + 2; - - if (pi->fcs == L2CAP_FCS_CRC16) { - skb_trim(skb, skb->len - 2); - rcv_fcs = get_unaligned_le16(skb->data + skb->len); - our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size); - - if (our_fcs != rcv_fcs) - return -EBADMSG; - } - return 0; -} - -static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - u16 control = 0; - - pi->frames_sent = 0; - - control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - - if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { - control |= L2CAP_SUPER_RCV_NOT_READY; - l2cap_send_sframe(pi, control); - pi->conn_state |= L2CAP_CONN_RNR_SENT; - } - - if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY) - l2cap_retransmit_frames(sk); - - l2cap_ertm_send(sk); - - if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) && - pi->frames_sent == 0) { - control |= L2CAP_SUPER_RCV_READY; - l2cap_send_sframe(pi, control); - } -} - -static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar) -{ - struct sk_buff *next_skb; - struct l2cap_pinfo *pi = l2cap_pi(sk); - int tx_seq_offset, next_tx_seq_offset; - - bt_cb(skb)->tx_seq = tx_seq; - bt_cb(skb)->sar = sar; - - next_skb = skb_peek(SREJ_QUEUE(sk)); - if (!next_skb) { - __skb_queue_tail(SREJ_QUEUE(sk), skb); - return 0; - } - - tx_seq_offset = (tx_seq - pi->buffer_seq) % 64; - if (tx_seq_offset < 0) - tx_seq_offset += 64; - - do { - if (bt_cb(next_skb)->tx_seq == tx_seq) - return -EINVAL; - - next_tx_seq_offset = (bt_cb(next_skb)->tx_seq - - pi->buffer_seq) % 64; - if (next_tx_seq_offset < 0) - next_tx_seq_offset += 64; - - if (next_tx_seq_offset > tx_seq_offset) { - __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb); - return 0; - } - - if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb)) - break; - - } while ((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb))); - - __skb_queue_tail(SREJ_QUEUE(sk), skb); - - return 0; -} - -static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct sk_buff *_skb; - int err; - - switch (control & L2CAP_CTRL_SAR) { - case L2CAP_SDU_UNSEGMENTED: - if (pi->conn_state & L2CAP_CONN_SAR_SDU) - goto drop; - - err = sock_queue_rcv_skb(sk, skb); - if (!err) - return err; - - break; - - case L2CAP_SDU_START: - if (pi->conn_state & L2CAP_CONN_SAR_SDU) - goto drop; - - pi->sdu_len = get_unaligned_le16(skb->data); - - if (pi->sdu_len > pi->imtu) - goto disconnect; - - pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC); - if (!pi->sdu) - return -ENOMEM; - - /* pull sdu_len bytes only after alloc, because of Local Busy - * condition we have to be sure that this will be executed - * only once, i.e., when alloc does not fail */ - skb_pull(skb, 2); - - memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); - - pi->conn_state |= L2CAP_CONN_SAR_SDU; - pi->partial_sdu_len = skb->len; - break; - - case L2CAP_SDU_CONTINUE: - if (!(pi->conn_state & L2CAP_CONN_SAR_SDU)) - goto disconnect; - - if (!pi->sdu) - goto disconnect; - - pi->partial_sdu_len += skb->len; - if (pi->partial_sdu_len > pi->sdu_len) - goto drop; - - memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); - - break; - - case L2CAP_SDU_END: - if (!(pi->conn_state & L2CAP_CONN_SAR_SDU)) - goto disconnect; - - if (!pi->sdu) - goto disconnect; - - if (!(pi->conn_state & L2CAP_CONN_SAR_RETRY)) { - pi->partial_sdu_len += skb->len; - - if (pi->partial_sdu_len > pi->imtu) - goto drop; - - if (pi->partial_sdu_len != pi->sdu_len) - goto drop; - - memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); - } - - _skb = skb_clone(pi->sdu, GFP_ATOMIC); - if (!_skb) { - pi->conn_state |= L2CAP_CONN_SAR_RETRY; - return -ENOMEM; - } - - err = sock_queue_rcv_skb(sk, _skb); - if (err < 0) { - kfree_skb(_skb); - pi->conn_state |= L2CAP_CONN_SAR_RETRY; - return err; - } - - pi->conn_state &= ~L2CAP_CONN_SAR_RETRY; - pi->conn_state &= ~L2CAP_CONN_SAR_SDU; - - kfree_skb(pi->sdu); - break; - } - - kfree_skb(skb); - return 0; - -drop: - kfree_skb(pi->sdu); - pi->sdu = NULL; - -disconnect: - l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); - kfree_skb(skb); - return 0; -} - -static int l2cap_try_push_rx_skb(struct sock *sk) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct sk_buff *skb; - u16 control; - int err; - - while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) { - control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; - err = l2cap_ertm_reassembly_sdu(sk, skb, control); - if (err < 0) { - skb_queue_head(BUSY_QUEUE(sk), skb); - return -EBUSY; - } - - pi->buffer_seq = (pi->buffer_seq + 1) % 64; - } - - if (!(pi->conn_state & L2CAP_CONN_RNR_SENT)) - goto done; - - control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL; - l2cap_send_sframe(pi, control); - l2cap_pi(sk)->retry_count = 1; - - del_timer(&pi->retrans_timer); - __mod_monitor_timer(); - - l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; - -done: - pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY; - pi->conn_state &= ~L2CAP_CONN_RNR_SENT; - - BT_DBG("sk %p, Exit local busy", sk); - - return 0; -} - -static void l2cap_busy_work(struct work_struct *work) -{ - DECLARE_WAITQUEUE(wait, current); - struct l2cap_pinfo *pi = - container_of(work, struct l2cap_pinfo, busy_work); - struct sock *sk = (struct sock *)pi; - int n_tries = 0, timeo = HZ/5, err; - struct sk_buff *skb; - - lock_sock(sk); - - add_wait_queue(sk_sleep(sk), &wait); - while ((skb = skb_peek(BUSY_QUEUE(sk)))) { - set_current_state(TASK_INTERRUPTIBLE); - - if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) { - err = -EBUSY; - l2cap_send_disconn_req(pi->conn, sk, EBUSY); - break; - } - - if (!timeo) - timeo = HZ/5; - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - - err = sock_error(sk); - if (err) - break; - - if (l2cap_try_push_rx_skb(sk) == 0) - break; - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(sk_sleep(sk), &wait); - - release_sock(sk); -} - -static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - int sctrl, err; - - if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { - bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; - __skb_queue_tail(BUSY_QUEUE(sk), skb); - return l2cap_try_push_rx_skb(sk); - - - } - - err = l2cap_ertm_reassembly_sdu(sk, skb, control); - if (err >= 0) { - pi->buffer_seq = (pi->buffer_seq + 1) % 64; - return err; - } - - /* Busy Condition */ - BT_DBG("sk %p, Enter local busy", sk); - - pi->conn_state |= L2CAP_CONN_LOCAL_BUSY; - bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; - __skb_queue_tail(BUSY_QUEUE(sk), skb); - - sctrl = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; - sctrl |= L2CAP_SUPER_RCV_NOT_READY; - l2cap_send_sframe(pi, sctrl); - - pi->conn_state |= L2CAP_CONN_RNR_SENT; - - del_timer(&pi->ack_timer); - - queue_work(_busy_wq, &pi->busy_work); - - return err; -} - -static int l2cap_streaming_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct sk_buff *_skb; - int err = -EINVAL; - - /* - * TODO: We have to notify the userland if some data is lost with the - * Streaming Mode. - */ - - switch (control & L2CAP_CTRL_SAR) { - case L2CAP_SDU_UNSEGMENTED: - if (pi->conn_state & L2CAP_CONN_SAR_SDU) { - kfree_skb(pi->sdu); - break; - } - - err = sock_queue_rcv_skb(sk, skb); - if (!err) - return 0; - - break; - - case L2CAP_SDU_START: - if (pi->conn_state & L2CAP_CONN_SAR_SDU) { - kfree_skb(pi->sdu); - break; - } - - pi->sdu_len = get_unaligned_le16(skb->data); - skb_pull(skb, 2); - - if (pi->sdu_len > pi->imtu) { - err = -EMSGSIZE; - break; - } - - pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC); - if (!pi->sdu) { - err = -ENOMEM; - break; - } - - memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); - - pi->conn_state |= L2CAP_CONN_SAR_SDU; - pi->partial_sdu_len = skb->len; - err = 0; - break; - - case L2CAP_SDU_CONTINUE: - if (!(pi->conn_state & L2CAP_CONN_SAR_SDU)) - break; - - memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); - - pi->partial_sdu_len += skb->len; - if (pi->partial_sdu_len > pi->sdu_len) - kfree_skb(pi->sdu); - else - err = 0; - - break; - - case L2CAP_SDU_END: - if (!(pi->conn_state & L2CAP_CONN_SAR_SDU)) - break; - - memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); - - pi->conn_state &= ~L2CAP_CONN_SAR_SDU; - pi->partial_sdu_len += skb->len; - - if (pi->partial_sdu_len > pi->imtu) - goto drop; - - if (pi->partial_sdu_len == pi->sdu_len) { - _skb = skb_clone(pi->sdu, GFP_ATOMIC); - err = sock_queue_rcv_skb(sk, _skb); - if (err < 0) - kfree_skb(_skb); - } - err = 0; - -drop: - kfree_skb(pi->sdu); - break; - } - - kfree_skb(skb); - return err; -} - -static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq) -{ - struct sk_buff *skb; - u16 control; - - while ((skb = skb_peek(SREJ_QUEUE(sk)))) { - if (bt_cb(skb)->tx_seq != tx_seq) - break; - - skb = skb_dequeue(SREJ_QUEUE(sk)); - control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; - l2cap_ertm_reassembly_sdu(sk, skb, control); - l2cap_pi(sk)->buffer_seq_srej = - (l2cap_pi(sk)->buffer_seq_srej + 1) % 64; - tx_seq = (tx_seq + 1) % 64; - } -} - -static void l2cap_resend_srejframe(struct sock *sk, u8 tx_seq) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct srej_list *l, *tmp; - u16 control; - - list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) { - if (l->tx_seq == tx_seq) { - list_del(&l->list); - kfree(l); - return; - } - control = L2CAP_SUPER_SELECT_REJECT; - control |= l->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; - l2cap_send_sframe(pi, control); - list_del(&l->list); - list_add_tail(&l->list, SREJ_LIST(sk)); - } -} - -static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct srej_list *new; - u16 control; - - while (tx_seq != pi->expected_tx_seq) { - control = L2CAP_SUPER_SELECT_REJECT; - control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; - l2cap_send_sframe(pi, control); - - new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); - new->tx_seq = pi->expected_tx_seq; - pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; - list_add_tail(&new->list, SREJ_LIST(sk)); - } - pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; -} - -static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - u8 tx_seq = __get_txseq(rx_control); - u8 req_seq = __get_reqseq(rx_control); - u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; - int tx_seq_offset, expected_tx_seq_offset; - int num_to_ack = (pi->tx_win/6) + 1; - int err = 0; - - BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq, - rx_control); - - if (L2CAP_CTRL_FINAL & rx_control && - l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) { - del_timer(&pi->monitor_timer); - if (pi->unacked_frames > 0) - __mod_retrans_timer(); - pi->conn_state &= ~L2CAP_CONN_WAIT_F; - } - - pi->expected_ack_seq = req_seq; - l2cap_drop_acked_frames(sk); - - if (tx_seq == pi->expected_tx_seq) - goto expected; - - tx_seq_offset = (tx_seq - pi->buffer_seq) % 64; - if (tx_seq_offset < 0) - tx_seq_offset += 64; - - /* invalid tx_seq */ - if (tx_seq_offset >= pi->tx_win) { - l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); - goto drop; - } - - if (pi->conn_state == L2CAP_CONN_LOCAL_BUSY) - goto drop; - - if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { - struct srej_list *first; - - first = list_first_entry(SREJ_LIST(sk), - struct srej_list, list); - if (tx_seq == first->tx_seq) { - l2cap_add_to_srej_queue(sk, skb, tx_seq, sar); - l2cap_check_srej_gap(sk, tx_seq); - - list_del(&first->list); - kfree(first); - - if (list_empty(SREJ_LIST(sk))) { - pi->buffer_seq = pi->buffer_seq_srej; - pi->conn_state &= ~L2CAP_CONN_SREJ_SENT; - l2cap_send_ack(pi); - BT_DBG("sk %p, Exit SREJ_SENT", sk); - } - } else { - struct srej_list *l; - - /* duplicated tx_seq */ - if (l2cap_add_to_srej_queue(sk, skb, tx_seq, sar) < 0) - goto drop; - - list_for_each_entry(l, SREJ_LIST(sk), list) { - if (l->tx_seq == tx_seq) { - l2cap_resend_srejframe(sk, tx_seq); - return 0; - } - } - l2cap_send_srejframe(sk, tx_seq); - } - } else { - expected_tx_seq_offset = - (pi->expected_tx_seq - pi->buffer_seq) % 64; - if (expected_tx_seq_offset < 0) - expected_tx_seq_offset += 64; - - /* duplicated tx_seq */ - if (tx_seq_offset < expected_tx_seq_offset) - goto drop; - - pi->conn_state |= L2CAP_CONN_SREJ_SENT; - - BT_DBG("sk %p, Enter SREJ", sk); - - INIT_LIST_HEAD(SREJ_LIST(sk)); - pi->buffer_seq_srej = pi->buffer_seq; - - __skb_queue_head_init(SREJ_QUEUE(sk)); - __skb_queue_head_init(BUSY_QUEUE(sk)); - l2cap_add_to_srej_queue(sk, skb, tx_seq, sar); - - pi->conn_state |= L2CAP_CONN_SEND_PBIT; - - l2cap_send_srejframe(sk, tx_seq); - - del_timer(&pi->ack_timer); - } - return 0; - -expected: - pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; - - if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { - bt_cb(skb)->tx_seq = tx_seq; - bt_cb(skb)->sar = sar; - __skb_queue_tail(SREJ_QUEUE(sk), skb); - return 0; - } - - err = l2cap_push_rx_skb(sk, skb, rx_control); - if (err < 0) - return 0; - - if (rx_control & L2CAP_CTRL_FINAL) { - if (pi->conn_state & L2CAP_CONN_REJ_ACT) - pi->conn_state &= ~L2CAP_CONN_REJ_ACT; - else - l2cap_retransmit_frames(sk); - } - - __mod_ack_timer(); - - pi->num_acked = (pi->num_acked + 1) % num_to_ack; - if (pi->num_acked == num_to_ack - 1) - l2cap_send_ack(pi); - - return 0; - -drop: - kfree_skb(skb); - return 0; -} - -static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - - BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control), - rx_control); - - pi->expected_ack_seq = __get_reqseq(rx_control); - l2cap_drop_acked_frames(sk); - - if (rx_control & L2CAP_CTRL_POLL) { - pi->conn_state |= L2CAP_CONN_SEND_FBIT; - if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { - if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && - (pi->unacked_frames > 0)) - __mod_retrans_timer(); - - pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; - l2cap_send_srejtail(sk); - } else { - l2cap_send_i_or_rr_or_rnr(sk); - } - - } else if (rx_control & L2CAP_CTRL_FINAL) { - pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; - - if (pi->conn_state & L2CAP_CONN_REJ_ACT) - pi->conn_state &= ~L2CAP_CONN_REJ_ACT; - else - l2cap_retransmit_frames(sk); - - } else { - if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && - (pi->unacked_frames > 0)) - __mod_retrans_timer(); - - pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; - if (pi->conn_state & L2CAP_CONN_SREJ_SENT) - l2cap_send_ack(pi); - else - l2cap_ertm_send(sk); - } -} - -static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - u8 tx_seq = __get_reqseq(rx_control); - - BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); - - pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; - - pi->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(sk); - - if (rx_control & L2CAP_CTRL_FINAL) { - if (pi->conn_state & L2CAP_CONN_REJ_ACT) - pi->conn_state &= ~L2CAP_CONN_REJ_ACT; - else - l2cap_retransmit_frames(sk); - } else { - l2cap_retransmit_frames(sk); - - if (pi->conn_state & L2CAP_CONN_WAIT_F) - pi->conn_state |= L2CAP_CONN_REJ_ACT; - } -} -static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - u8 tx_seq = __get_reqseq(rx_control); - - BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); - - pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; - - if (rx_control & L2CAP_CTRL_POLL) { - pi->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(sk); - - pi->conn_state |= L2CAP_CONN_SEND_FBIT; - l2cap_retransmit_one_frame(sk, tx_seq); - - l2cap_ertm_send(sk); - - if (pi->conn_state & L2CAP_CONN_WAIT_F) { - pi->srej_save_reqseq = tx_seq; - pi->conn_state |= L2CAP_CONN_SREJ_ACT; - } - } else if (rx_control & L2CAP_CTRL_FINAL) { - if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) && - pi->srej_save_reqseq == tx_seq) - pi->conn_state &= ~L2CAP_CONN_SREJ_ACT; - else - l2cap_retransmit_one_frame(sk, tx_seq); - } else { - l2cap_retransmit_one_frame(sk, tx_seq); - if (pi->conn_state & L2CAP_CONN_WAIT_F) { - pi->srej_save_reqseq = tx_seq; - pi->conn_state |= L2CAP_CONN_SREJ_ACT; - } - } -} - -static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - u8 tx_seq = __get_reqseq(rx_control); - - BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); - - pi->conn_state |= L2CAP_CONN_REMOTE_BUSY; - pi->expected_ack_seq = tx_seq; - l2cap_drop_acked_frames(sk); - - if (rx_control & L2CAP_CTRL_POLL) - pi->conn_state |= L2CAP_CONN_SEND_FBIT; - - if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) { - del_timer(&pi->retrans_timer); - if (rx_control & L2CAP_CTRL_POLL) - l2cap_send_rr_or_rnr(pi, L2CAP_CTRL_FINAL); - return; - } - - if (rx_control & L2CAP_CTRL_POLL) - l2cap_send_srejtail(sk); - else - l2cap_send_sframe(pi, L2CAP_SUPER_RCV_READY); -} - -static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) -{ - BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len); - - if (L2CAP_CTRL_FINAL & rx_control && - l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) { - del_timer(&l2cap_pi(sk)->monitor_timer); - if (l2cap_pi(sk)->unacked_frames > 0) - __mod_retrans_timer(); - l2cap_pi(sk)->conn_state &= ~L2CAP_CONN_WAIT_F; - } - - switch (rx_control & L2CAP_CTRL_SUPERVISE) { - case L2CAP_SUPER_RCV_READY: - l2cap_data_channel_rrframe(sk, rx_control); - break; - - case L2CAP_SUPER_REJECT: - l2cap_data_channel_rejframe(sk, rx_control); - break; - - case L2CAP_SUPER_SELECT_REJECT: - l2cap_data_channel_srejframe(sk, rx_control); - break; - - case L2CAP_SUPER_RCV_NOT_READY: - l2cap_data_channel_rnrframe(sk, rx_control); - break; - } - - kfree_skb(skb); - return 0; -} - -static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - u16 control; - u8 req_seq; - int len, next_tx_seq_offset, req_seq_offset; - - control = get_unaligned_le16(skb->data); - skb_pull(skb, 2); - len = skb->len; - - /* - * We can just drop the corrupted I-frame here. - * Receiver will miss it and start proper recovery - * procedures and ask retransmission. - */ - if (l2cap_check_fcs(pi, skb)) - goto drop; - - if (__is_sar_start(control) && __is_iframe(control)) - len -= 2; - - if (pi->fcs == L2CAP_FCS_CRC16) - len -= 2; - - if (len > pi->mps) { - l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); - goto drop; - } - - req_seq = __get_reqseq(control); - req_seq_offset = (req_seq - pi->expected_ack_seq) % 64; - if (req_seq_offset < 0) - req_seq_offset += 64; - - next_tx_seq_offset = - (pi->next_tx_seq - pi->expected_ack_seq) % 64; - if (next_tx_seq_offset < 0) - next_tx_seq_offset += 64; - - /* check for invalid req-seq */ - if (req_seq_offset > next_tx_seq_offset) { - l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); - goto drop; - } - - if (__is_iframe(control)) { - if (len < 0) { - l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); - goto drop; - } - - l2cap_data_channel_iframe(sk, control, skb); - } else { - if (len != 0) { - BT_ERR("%d", len); - l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); - goto drop; - } - - l2cap_data_channel_sframe(sk, control, skb); - } - - return 0; - -drop: - kfree_skb(skb); - return 0; -} - -static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) -{ - struct sock *sk; - struct l2cap_pinfo *pi; - u16 control; - u8 tx_seq; - int len; - - sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); - if (!sk) { - BT_DBG("unknown cid 0x%4.4x", cid); - goto drop; - } - - pi = l2cap_pi(sk); - - BT_DBG("sk %p, len %d", sk, skb->len); - - if (sk->sk_state != BT_CONNECTED) - goto drop; - - switch (pi->mode) { - case L2CAP_MODE_BASIC: - /* If socket recv buffers overflows we drop data here - * which is *bad* because L2CAP has to be reliable. - * But we don't have any other choice. L2CAP doesn't - * provide flow control mechanism. */ - - if (pi->imtu < skb->len) - goto drop; - - if (!sock_queue_rcv_skb(sk, skb)) - goto done; - break; - - case L2CAP_MODE_ERTM: - if (!sock_owned_by_user(sk)) { - l2cap_ertm_data_rcv(sk, skb); - } else { - if (sk_add_backlog(sk, skb)) - goto drop; - } - - goto done; - - case L2CAP_MODE_STREAMING: - control = get_unaligned_le16(skb->data); - skb_pull(skb, 2); - len = skb->len; - - if (l2cap_check_fcs(pi, skb)) - goto drop; - - if (__is_sar_start(control)) - len -= 2; - - if (pi->fcs == L2CAP_FCS_CRC16) - len -= 2; - - if (len > pi->mps || len < 0 || __is_sframe(control)) - goto drop; - - tx_seq = __get_txseq(control); - - if (pi->expected_tx_seq == tx_seq) - pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; - else - pi->expected_tx_seq = (tx_seq + 1) % 64; - - l2cap_streaming_reassembly_sdu(sk, skb, control); - - goto done; - - default: - BT_DBG("sk %p: bad mode 0x%2.2x", sk, pi->mode); - break; - } - -drop: - kfree_skb(skb); - -done: - if (sk) - bh_unlock_sock(sk); - - return 0; -} - -static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, struct sk_buff *skb) -{ - struct sock *sk; - - sk = l2cap_get_sock_by_psm(0, psm, conn->src); - if (!sk) - goto drop; - - bh_lock_sock(sk); - - BT_DBG("sk %p, len %d", sk, skb->len); - - if (sk->sk_state != BT_BOUND && sk->sk_state != BT_CONNECTED) - goto drop; - - if (l2cap_pi(sk)->imtu < skb->len) - goto drop; - - if (!sock_queue_rcv_skb(sk, skb)) - goto done; - -drop: - kfree_skb(skb); - -done: - if (sk) - bh_unlock_sock(sk); - return 0; -} - -static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) -{ - struct l2cap_hdr *lh = (void *) skb->data; - u16 cid, len; - __le16 psm; - - skb_pull(skb, L2CAP_HDR_SIZE); - cid = __le16_to_cpu(lh->cid); - len = __le16_to_cpu(lh->len); - - if (len != skb->len) { - kfree_skb(skb); - return; - } - - BT_DBG("len %d, cid 0x%4.4x", len, cid); - - switch (cid) { - case L2CAP_CID_SIGNALING: - l2cap_sig_channel(conn, skb); - break; - - case L2CAP_CID_CONN_LESS: - psm = get_unaligned_le16(skb->data); - skb_pull(skb, 2); - l2cap_conless_channel(conn, psm, skb); - break; - - default: - l2cap_data_channel(conn, cid, skb); - break; - } -} - -/* ---- L2CAP interface with lower layer (HCI) ---- */ - -static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) -{ - int exact = 0, lm1 = 0, lm2 = 0; - register struct sock *sk; - struct hlist_node *node; - - if (type != ACL_LINK) - return -EINVAL; - - BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); - - /* Find listening sockets and check their link_mode */ - read_lock(&l2cap_sk_list.lock); - sk_for_each(sk, node, &l2cap_sk_list.head) { - if (sk->sk_state != BT_LISTEN) - continue; - - if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) { - lm1 |= HCI_LM_ACCEPT; - if (l2cap_pi(sk)->role_switch) - lm1 |= HCI_LM_MASTER; - exact++; - } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) { - lm2 |= HCI_LM_ACCEPT; - if (l2cap_pi(sk)->role_switch) - lm2 |= HCI_LM_MASTER; - } - } - read_unlock(&l2cap_sk_list.lock); - - return exact ? lm1 : lm2; -} - -static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) -{ - struct l2cap_conn *conn; - - BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); - - if (hcon->type != ACL_LINK) - return -EINVAL; - - if (!status) { - conn = l2cap_conn_add(hcon, status); - if (conn) - l2cap_conn_ready(conn); - } else - l2cap_conn_del(hcon, bt_err(status)); - - return 0; -} - -static int l2cap_disconn_ind(struct hci_conn *hcon) -{ - struct l2cap_conn *conn = hcon->l2cap_data; - - BT_DBG("hcon %p", hcon); - - if (hcon->type != ACL_LINK || !conn) - return 0x13; - - return conn->disc_reason; -} - -static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) -{ - BT_DBG("hcon %p reason %d", hcon, reason); - - if (hcon->type != ACL_LINK) - return -EINVAL; - - l2cap_conn_del(hcon, bt_err(reason)); - - return 0; -} - -static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt) -{ - if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM) - return; - - if (encrypt == 0x00) { - if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) { - l2cap_sock_clear_timer(sk); - l2cap_sock_set_timer(sk, HZ * 5); - } else if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) - __l2cap_sock_close(sk, ECONNREFUSED); - } else { - if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) - l2cap_sock_clear_timer(sk); - } -} - -static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) -{ - struct l2cap_chan_list *l; - struct l2cap_conn *conn = hcon->l2cap_data; - struct sock *sk; - - if (!conn) - return 0; - - l = &conn->chan_list; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - bh_lock_sock(sk); - - if (l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND) { - bh_unlock_sock(sk); - continue; - } - - if (!status && (sk->sk_state == BT_CONNECTED || - sk->sk_state == BT_CONFIG)) { - l2cap_check_encryption(sk, encrypt); - bh_unlock_sock(sk); - continue; - } - - if (sk->sk_state == BT_CONNECT) { - if (!status) { - struct l2cap_conn_req req; - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; - - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_REQ, sizeof(req), &req); - } else { - l2cap_sock_clear_timer(sk); - l2cap_sock_set_timer(sk, HZ / 10); - } - } else if (sk->sk_state == BT_CONNECT2) { - struct l2cap_conn_rsp rsp; - __u16 result; - - if (!status) { - sk->sk_state = BT_CONFIG; - result = L2CAP_CR_SUCCESS; - } else { - sk->sk_state = BT_DISCONN; - l2cap_sock_set_timer(sk, HZ / 10); - result = L2CAP_CR_SEC_BLOCK; - } - - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - } - - bh_unlock_sock(sk); - } - - read_unlock(&l->lock); - - return 0; -} - -static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) -{ - struct l2cap_conn *conn = hcon->l2cap_data; - - if (!conn) - conn = l2cap_conn_add(hcon, 0); - - if (!conn) - goto drop; - - BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags); - - if (!(flags & ACL_CONT)) { - struct l2cap_hdr *hdr; - struct sock *sk; - u16 cid; - int len; - - if (conn->rx_len) { - BT_ERR("Unexpected start frame (len %d)", skb->len); - kfree_skb(conn->rx_skb); - conn->rx_skb = NULL; - conn->rx_len = 0; - l2cap_conn_unreliable(conn, ECOMM); - } - - /* Start fragment always begin with Basic L2CAP header */ - if (skb->len < L2CAP_HDR_SIZE) { - BT_ERR("Frame is too short (len %d)", skb->len); - l2cap_conn_unreliable(conn, ECOMM); - goto drop; - } - - hdr = (struct l2cap_hdr *) skb->data; - len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE; - cid = __le16_to_cpu(hdr->cid); - - if (len == skb->len) { - /* Complete frame received */ - l2cap_recv_frame(conn, skb); - return 0; - } - - BT_DBG("Start: total len %d, frag len %d", len, skb->len); - - if (skb->len > len) { - BT_ERR("Frame is too long (len %d, expected len %d)", - skb->len, len); - l2cap_conn_unreliable(conn, ECOMM); - goto drop; - } - - sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); - - if (sk && l2cap_pi(sk)->imtu < len - L2CAP_HDR_SIZE) { - BT_ERR("Frame exceeding recv MTU (len %d, MTU %d)", - len, l2cap_pi(sk)->imtu); - bh_unlock_sock(sk); - l2cap_conn_unreliable(conn, ECOMM); - goto drop; - } - - if (sk) - bh_unlock_sock(sk); - - /* Allocate skb for the complete frame (with header) */ - conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!conn->rx_skb) - goto drop; - - skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), - skb->len); - conn->rx_len = len - skb->len; - } else { - BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); - - if (!conn->rx_len) { - BT_ERR("Unexpected continuation frame (len %d)", skb->len); - l2cap_conn_unreliable(conn, ECOMM); - goto drop; - } - - if (skb->len > conn->rx_len) { - BT_ERR("Fragment is too long (len %d, expected %d)", - skb->len, conn->rx_len); - kfree_skb(conn->rx_skb); - conn->rx_skb = NULL; - conn->rx_len = 0; - l2cap_conn_unreliable(conn, ECOMM); - goto drop; - } - - skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), - skb->len); - conn->rx_len -= skb->len; - - if (!conn->rx_len) { - /* Complete frame received */ - l2cap_recv_frame(conn, conn->rx_skb); - conn->rx_skb = NULL; - } - } - -drop: - kfree_skb(skb); - return 0; -} - -static int l2cap_debugfs_show(struct seq_file *f, void *p) -{ - struct sock *sk; - struct hlist_node *node; - - read_lock_bh(&l2cap_sk_list.lock); - - sk_for_each(sk, node, &l2cap_sk_list.head) { - struct l2cap_pinfo *pi = l2cap_pi(sk); - - seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", - batostr(&bt_sk(sk)->src), - batostr(&bt_sk(sk)->dst), - sk->sk_state, __le16_to_cpu(pi->psm), - pi->scid, pi->dcid, - pi->imtu, pi->omtu, pi->sec_level); - } - - read_unlock_bh(&l2cap_sk_list.lock); - - return 0; -} - -static int l2cap_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, l2cap_debugfs_show, inode->i_private); -} - -static const struct file_operations l2cap_debugfs_fops = { - .open = l2cap_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static struct dentry *l2cap_debugfs; - -static const struct proto_ops l2cap_sock_ops = { - .family = PF_BLUETOOTH, - .owner = THIS_MODULE, - .release = l2cap_sock_release, - .bind = l2cap_sock_bind, - .connect = l2cap_sock_connect, - .listen = l2cap_sock_listen, - .accept = l2cap_sock_accept, - .getname = l2cap_sock_getname, - .sendmsg = l2cap_sock_sendmsg, - .recvmsg = l2cap_sock_recvmsg, - .poll = bt_sock_poll, - .ioctl = bt_sock_ioctl, - .mmap = sock_no_mmap, - .socketpair = sock_no_socketpair, - .shutdown = l2cap_sock_shutdown, - .setsockopt = l2cap_sock_setsockopt, - .getsockopt = l2cap_sock_getsockopt -}; - -static const struct net_proto_family l2cap_sock_family_ops = { - .family = PF_BLUETOOTH, - .owner = THIS_MODULE, - .create = l2cap_sock_create, -}; - -static struct hci_proto l2cap_hci_proto = { - .name = "L2CAP", - .id = HCI_PROTO_L2CAP, - .connect_ind = l2cap_connect_ind, - .connect_cfm = l2cap_connect_cfm, - .disconn_ind = l2cap_disconn_ind, - .disconn_cfm = l2cap_disconn_cfm, - .security_cfm = l2cap_security_cfm, - .recv_acldata = l2cap_recv_acldata -}; - -static int __init l2cap_init(void) -{ - int err; - - err = proto_register(&l2cap_proto, 0); - if (err < 0) - return err; - - _busy_wq = create_singlethread_workqueue("l2cap"); - if (!_busy_wq) { - proto_unregister(&l2cap_proto); - return -ENOMEM; - } - - err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); - if (err < 0) { - BT_ERR("L2CAP socket registration failed"); - goto error; - } - - err = hci_register_proto(&l2cap_hci_proto); - if (err < 0) { - BT_ERR("L2CAP protocol registration failed"); - bt_sock_unregister(BTPROTO_L2CAP); - goto error; - } - - if (bt_debugfs) { - l2cap_debugfs = debugfs_create_file("l2cap", 0444, - bt_debugfs, NULL, &l2cap_debugfs_fops); - if (!l2cap_debugfs) - BT_ERR("Failed to create L2CAP debug file"); - } - - BT_INFO("L2CAP ver %s", VERSION); - BT_INFO("L2CAP socket layer initialized"); - - return 0; - -error: - destroy_workqueue(_busy_wq); - proto_unregister(&l2cap_proto); - return err; -} - -static void __exit l2cap_exit(void) -{ - debugfs_remove(l2cap_debugfs); - - flush_workqueue(_busy_wq); - destroy_workqueue(_busy_wq); - - if (bt_sock_unregister(BTPROTO_L2CAP) < 0) - BT_ERR("L2CAP socket unregistration failed"); - - if (hci_unregister_proto(&l2cap_hci_proto) < 0) - BT_ERR("L2CAP protocol unregistration failed"); - - proto_unregister(&l2cap_proto); -} - -void l2cap_load(void) -{ - /* Dummy function to trigger automatic L2CAP module loading by - * other modules that use L2CAP sockets but don't use any other - * symbols from it. */ -} -EXPORT_SYMBOL(l2cap_load); - -module_init(l2cap_init); -module_exit(l2cap_exit); - -module_param(disable_ertm, bool, 0644); -MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); - -MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); -MODULE_VERSION(VERSION); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("bt-proto-0"); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c new file mode 100644 index 0000000..28d2954 --- /dev/null +++ b/net/bluetooth/l2cap_core.c @@ -0,0 +1,4984 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (C) 2009-2010 Gustavo F. Padovan + Copyright (C) 2010 Google Inc. + + Written 2000,2001 by Maxim Krasnyansky + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +/* Bluetooth L2CAP core and sockets. */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#define VERSION "2.15" + +static int disable_ertm; + +static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; +static u8 l2cap_fixed_chan[8] = { 0x02, }; + +static const struct proto_ops l2cap_sock_ops; + +static struct workqueue_struct *_busy_wq; + +static struct bt_sock_list l2cap_sk_list = { + .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) +}; + +static void l2cap_busy_work(struct work_struct *work); + +static void __l2cap_sock_close(struct sock *sk, int reason); +static void l2cap_sock_close(struct sock *sk); +static void l2cap_sock_kill(struct sock *sk); + +static int l2cap_build_conf_req(struct sock *sk, void *data); +static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, + u8 code, u8 ident, u16 dlen, void *data); + +static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); + +/* ---- L2CAP timers ---- */ +static void l2cap_sock_set_timer(struct sock *sk, long timeout) +{ + BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); + sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); +} + +static void l2cap_sock_clear_timer(struct sock *sk) +{ + BT_DBG("sock %p state %d", sk, sk->sk_state); + sk_stop_timer(sk, &sk->sk_timer); +} + +static void l2cap_sock_timeout(unsigned long arg) +{ + struct sock *sk = (struct sock *) arg; + int reason; + + BT_DBG("sock %p state %d", sk, sk->sk_state); + + bh_lock_sock(sk); + + if (sock_owned_by_user(sk)) { + /* sk is owned by user. Try again later */ + l2cap_sock_set_timer(sk, HZ / 5); + bh_unlock_sock(sk); + sock_put(sk); + return; + } + + if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) + reason = ECONNREFUSED; + else if (sk->sk_state == BT_CONNECT && + l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) + reason = ECONNREFUSED; + else + reason = ETIMEDOUT; + + __l2cap_sock_close(sk, reason); + + bh_unlock_sock(sk); + + l2cap_sock_kill(sk); + sock_put(sk); +} + +/* ---- L2CAP channels ---- */ +static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->dcid == cid) + break; + } + return s; +} + +static struct sock *__l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->scid == cid) + break; + } + return s; +} + +/* Find channel with given SCID. + * Returns locked socket */ +static inline struct sock *l2cap_get_chan_by_scid(struct l2cap_chan_list *l, u16 cid) +{ + struct sock *s; + read_lock(&l->lock); + s = __l2cap_get_chan_by_scid(l, cid); + if (s) + bh_lock_sock(s); + read_unlock(&l->lock); + return s; +} + +static struct sock *__l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) +{ + struct sock *s; + for (s = l->head; s; s = l2cap_pi(s)->next_c) { + if (l2cap_pi(s)->ident == ident) + break; + } + return s; +} + +static inline struct sock *l2cap_get_chan_by_ident(struct l2cap_chan_list *l, u8 ident) +{ + struct sock *s; + read_lock(&l->lock); + s = __l2cap_get_chan_by_ident(l, ident); + if (s) + bh_lock_sock(s); + read_unlock(&l->lock); + return s; +} + +static u16 l2cap_alloc_cid(struct l2cap_chan_list *l) +{ + u16 cid = L2CAP_CID_DYN_START; + + for (; cid < L2CAP_CID_DYN_END; cid++) { + if (!__l2cap_get_chan_by_scid(l, cid)) + return cid; + } + + return 0; +} + +static inline void __l2cap_chan_link(struct l2cap_chan_list *l, struct sock *sk) +{ + sock_hold(sk); + + if (l->head) + l2cap_pi(l->head)->prev_c = sk; + + l2cap_pi(sk)->next_c = l->head; + l2cap_pi(sk)->prev_c = NULL; + l->head = sk; +} + +static inline void l2cap_chan_unlink(struct l2cap_chan_list *l, struct sock *sk) +{ + struct sock *next = l2cap_pi(sk)->next_c, *prev = l2cap_pi(sk)->prev_c; + + write_lock_bh(&l->lock); + if (sk == l->head) + l->head = next; + + if (next) + l2cap_pi(next)->prev_c = prev; + if (prev) + l2cap_pi(prev)->next_c = next; + write_unlock_bh(&l->lock); + + __sock_put(sk); +} + +static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) +{ + struct l2cap_chan_list *l = &conn->chan_list; + + BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, + l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); + + conn->disc_reason = 0x13; + + l2cap_pi(sk)->conn = conn; + + if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) { + /* Alloc CID for connection-oriented socket */ + l2cap_pi(sk)->scid = l2cap_alloc_cid(l); + } else if (sk->sk_type == SOCK_DGRAM) { + /* Connectionless socket */ + l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS; + l2cap_pi(sk)->dcid = L2CAP_CID_CONN_LESS; + l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; + } else { + /* Raw socket can send/recv signalling messages only */ + l2cap_pi(sk)->scid = L2CAP_CID_SIGNALING; + l2cap_pi(sk)->dcid = L2CAP_CID_SIGNALING; + l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; + } + + __l2cap_chan_link(l, sk); + + if (parent) + bt_accept_enqueue(parent, sk); +} + +/* Delete channel. + * Must be called on the locked socket. */ +static void l2cap_chan_del(struct sock *sk, int err) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct sock *parent = bt_sk(sk)->parent; + + l2cap_sock_clear_timer(sk); + + BT_DBG("sk %p, conn %p, err %d", sk, conn, err); + + if (conn) { + /* Unlink from channel list */ + l2cap_chan_unlink(&conn->chan_list, sk); + l2cap_pi(sk)->conn = NULL; + hci_conn_put(conn->hcon); + } + + sk->sk_state = BT_CLOSED; + sock_set_flag(sk, SOCK_ZAPPED); + + if (err) + sk->sk_err = err; + + if (parent) { + bt_accept_unlink(sk); + parent->sk_data_ready(parent, 0); + } else + sk->sk_state_change(sk); + + skb_queue_purge(TX_QUEUE(sk)); + + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { + struct srej_list *l, *tmp; + + del_timer(&l2cap_pi(sk)->retrans_timer); + del_timer(&l2cap_pi(sk)->monitor_timer); + del_timer(&l2cap_pi(sk)->ack_timer); + + skb_queue_purge(SREJ_QUEUE(sk)); + skb_queue_purge(BUSY_QUEUE(sk)); + + list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) { + list_del(&l->list); + kfree(l); + } + } +} + +static inline u8 l2cap_get_auth_type(struct sock *sk) +{ + if (sk->sk_type == SOCK_RAW) { + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: + return HCI_AT_DEDICATED_BONDING_MITM; + case BT_SECURITY_MEDIUM: + return HCI_AT_DEDICATED_BONDING; + default: + return HCI_AT_NO_BONDING; + } + } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; + + if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) + return HCI_AT_NO_BONDING_MITM; + else + return HCI_AT_NO_BONDING; + } else { + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: + return HCI_AT_GENERAL_BONDING_MITM; + case BT_SECURITY_MEDIUM: + return HCI_AT_GENERAL_BONDING; + default: + return HCI_AT_NO_BONDING; + } + } +} + +/* Service level security */ +static inline int l2cap_check_security(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + __u8 auth_type; + + auth_type = l2cap_get_auth_type(sk); + + return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level, + auth_type); +} + +static inline u8 l2cap_get_ident(struct l2cap_conn *conn) +{ + u8 id; + + /* Get next available identificator. + * 1 - 128 are used by kernel. + * 129 - 199 are reserved. + * 200 - 254 are used by utilities like l2ping, etc. + */ + + spin_lock_bh(&conn->lock); + + if (++conn->tx_ident > 128) + conn->tx_ident = 1; + + id = conn->tx_ident; + + spin_unlock_bh(&conn->lock); + + return id; +} + +static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) +{ + struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); + u8 flags; + + BT_DBG("code 0x%2.2x", code); + + if (!skb) + return; + + if (lmp_no_flush_capable(conn->hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(conn->hcon, skb, flags); +} + +static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) +{ + struct sk_buff *skb; + struct l2cap_hdr *lh; + struct l2cap_conn *conn = pi->conn; + struct sock *sk = (struct sock *)pi; + int count, hlen = L2CAP_HDR_SIZE + 2; + u8 flags; + + if (sk->sk_state != BT_CONNECTED) + return; + + if (pi->fcs == L2CAP_FCS_CRC16) + hlen += 2; + + BT_DBG("pi %p, control 0x%2.2x", pi, control); + + count = min_t(unsigned int, conn->mtu, hlen); + control |= L2CAP_CTRL_FRAME_TYPE; + + if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { + control |= L2CAP_CTRL_FINAL; + pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; + } + + if (pi->conn_state & L2CAP_CONN_SEND_PBIT) { + control |= L2CAP_CTRL_POLL; + pi->conn_state &= ~L2CAP_CONN_SEND_PBIT; + } + + skb = bt_skb_alloc(count, GFP_ATOMIC); + if (!skb) + return; + + lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE); + lh->cid = cpu_to_le16(pi->dcid); + put_unaligned_le16(control, skb_put(skb, 2)); + + if (pi->fcs == L2CAP_FCS_CRC16) { + u16 fcs = crc16(0, (u8 *)lh, count - 2); + put_unaligned_le16(fcs, skb_put(skb, 2)); + } + + if (lmp_no_flush_capable(conn->hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(pi->conn->hcon, skb, flags); +} + +static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) +{ + if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { + control |= L2CAP_SUPER_RCV_NOT_READY; + pi->conn_state |= L2CAP_CONN_RNR_SENT; + } else + control |= L2CAP_SUPER_RCV_READY; + + control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + + l2cap_send_sframe(pi, control); +} + +static inline int __l2cap_no_conn_pending(struct sock *sk) +{ + return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND); +} + +static void l2cap_do_start(struct sock *sk) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { + if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) + return; + + if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } + } else { + struct l2cap_info_req req; + req.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; + conn->info_ident = l2cap_get_ident(conn); + + mod_timer(&conn->info_timer, jiffies + + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(req), &req); + } +} + +static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) +{ + u32 local_feat_mask = l2cap_feat_mask; + if (!disable_ertm) + local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING; + + switch (mode) { + case L2CAP_MODE_ERTM: + return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask; + case L2CAP_MODE_STREAMING: + return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask; + default: + return 0x00; + } +} + +static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) +{ + struct l2cap_disconn_req req; + + if (!conn) + return; + + skb_queue_purge(TX_QUEUE(sk)); + + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) { + del_timer(&l2cap_pi(sk)->retrans_timer); + del_timer(&l2cap_pi(sk)->monitor_timer); + del_timer(&l2cap_pi(sk)->ack_timer); + } + + req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid); + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + l2cap_send_cmd(conn, l2cap_get_ident(conn), + L2CAP_DISCONN_REQ, sizeof(req), &req); + + sk->sk_state = BT_DISCONN; + sk->sk_err = err; +} + +/* ---- L2CAP connections ---- */ +static void l2cap_conn_start(struct l2cap_conn *conn) +{ + struct l2cap_chan_list *l = &conn->chan_list; + struct sock_del_list del, *tmp1, *tmp2; + struct sock *sk; + + BT_DBG("conn %p", conn); + + INIT_LIST_HEAD(&del.list); + + read_lock(&l->lock); + + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + bh_lock_sock(sk); + + if (sk->sk_type != SOCK_SEQPACKET && + sk->sk_type != SOCK_STREAM) { + bh_unlock_sock(sk); + continue; + } + + if (sk->sk_state == BT_CONNECT) { + struct l2cap_conn_req req; + + if (!l2cap_check_security(sk) || + !__l2cap_no_conn_pending(sk)) { + bh_unlock_sock(sk); + continue; + } + + if (!l2cap_mode_supported(l2cap_pi(sk)->mode, + conn->feat_mask) + && l2cap_pi(sk)->conf_state & + L2CAP_CONF_STATE2_DEVICE) { + tmp1 = kzalloc(sizeof(struct sock_del_list), + GFP_ATOMIC); + tmp1->sk = sk; + list_add_tail(&tmp1->list, &del.list); + bh_unlock_sock(sk); + continue; + } + + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + + } else if (sk->sk_state == BT_CONNECT2) { + struct l2cap_conn_rsp rsp; + char buf[128]; + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + + if (l2cap_check_security(sk)) { + if (bt_sk(sk)->defer_setup) { + struct sock *parent = bt_sk(sk)->parent; + rsp.result = cpu_to_le16(L2CAP_CR_PEND); + rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND); + parent->sk_data_ready(parent, 0); + + } else { + sk->sk_state = BT_CONFIG; + rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + } + } else { + rsp.result = cpu_to_le16(L2CAP_CR_PEND); + rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); + } + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT || + rsp.result != L2CAP_CR_SUCCESS) { + bh_unlock_sock(sk); + continue; + } + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + } + + bh_unlock_sock(sk); + } + + read_unlock(&l->lock); + + list_for_each_entry_safe(tmp1, tmp2, &del.list, list) { + bh_lock_sock(tmp1->sk); + __l2cap_sock_close(tmp1->sk, ECONNRESET); + bh_unlock_sock(tmp1->sk); + list_del(&tmp1->list); + kfree(tmp1); + } +} + +static void l2cap_conn_ready(struct l2cap_conn *conn) +{ + struct l2cap_chan_list *l = &conn->chan_list; + struct sock *sk; + + BT_DBG("conn %p", conn); + + read_lock(&l->lock); + + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + bh_lock_sock(sk); + + if (sk->sk_type != SOCK_SEQPACKET && + sk->sk_type != SOCK_STREAM) { + l2cap_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } else if (sk->sk_state == BT_CONNECT) + l2cap_do_start(sk); + + bh_unlock_sock(sk); + } + + read_unlock(&l->lock); +} + +/* Notify sockets that we cannot guaranty reliability anymore */ +static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) +{ + struct l2cap_chan_list *l = &conn->chan_list; + struct sock *sk; + + BT_DBG("conn %p", conn); + + read_lock(&l->lock); + + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + if (l2cap_pi(sk)->force_reliable) + sk->sk_err = err; + } + + read_unlock(&l->lock); +} + +static void l2cap_info_timeout(unsigned long arg) +{ + struct l2cap_conn *conn = (void *) arg; + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; + + l2cap_conn_start(conn); +} + +static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) +{ + struct l2cap_conn *conn = hcon->l2cap_data; + + if (conn || status) + return conn; + + conn = kzalloc(sizeof(struct l2cap_conn), GFP_ATOMIC); + if (!conn) + return NULL; + + hcon->l2cap_data = conn; + conn->hcon = hcon; + + BT_DBG("hcon %p conn %p", hcon, conn); + + conn->mtu = hcon->hdev->acl_mtu; + conn->src = &hcon->hdev->bdaddr; + conn->dst = &hcon->dst; + + conn->feat_mask = 0; + + spin_lock_init(&conn->lock); + rwlock_init(&conn->chan_list.lock); + + setup_timer(&conn->info_timer, l2cap_info_timeout, + (unsigned long) conn); + + conn->disc_reason = 0x13; + + return conn; +} + +static void l2cap_conn_del(struct hci_conn *hcon, int err) +{ + struct l2cap_conn *conn = hcon->l2cap_data; + struct sock *sk; + + if (!conn) + return; + + BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + + kfree_skb(conn->rx_skb); + + /* Kill channels */ + while ((sk = conn->chan_list.head)) { + bh_lock_sock(sk); + l2cap_chan_del(sk, err); + bh_unlock_sock(sk); + l2cap_sock_kill(sk); + } + + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) + del_timer_sync(&conn->info_timer); + + hcon->l2cap_data = NULL; + kfree(conn); +} + +static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct sock *parent) +{ + struct l2cap_chan_list *l = &conn->chan_list; + write_lock_bh(&l->lock); + __l2cap_chan_add(conn, sk, parent); + write_unlock_bh(&l->lock); +} + +/* ---- Socket interface ---- */ +static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) +{ + struct sock *sk; + struct hlist_node *node; + sk_for_each(sk, node, &l2cap_sk_list.head) + if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src)) + goto found; + sk = NULL; +found: + return sk; +} + +/* Find socket with psm and source bdaddr. + * Returns closest match. + */ +static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) +{ + struct sock *sk = NULL, *sk1 = NULL; + struct hlist_node *node; + + read_lock(&l2cap_sk_list.lock); + + sk_for_each(sk, node, &l2cap_sk_list.head) { + if (state && sk->sk_state != state) + continue; + + if (l2cap_pi(sk)->psm == psm) { + /* Exact match. */ + if (!bacmp(&bt_sk(sk)->src, src)) + break; + + /* Closest match */ + if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) + sk1 = sk; + } + } + + read_unlock(&l2cap_sk_list.lock); + + return node ? sk : sk1; +} + +static void l2cap_sock_destruct(struct sock *sk) +{ + BT_DBG("sk %p", sk); + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); +} + +static void l2cap_sock_cleanup_listen(struct sock *parent) +{ + struct sock *sk; + + BT_DBG("parent %p", parent); + + /* Close not yet accepted channels */ + while ((sk = bt_accept_dequeue(parent, NULL))) + l2cap_sock_close(sk); + + parent->sk_state = BT_CLOSED; + sock_set_flag(parent, SOCK_ZAPPED); +} + +/* Kill socket (only if zapped and orphan) + * Must be called on unlocked socket. + */ +static void l2cap_sock_kill(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) + return; + + BT_DBG("sk %p state %d", sk, sk->sk_state); + + /* Kill poor orphan */ + bt_sock_unlink(&l2cap_sk_list, sk); + sock_set_flag(sk, SOCK_DEAD); + sock_put(sk); +} + +static void __l2cap_sock_close(struct sock *sk, int reason) +{ + BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); + + switch (sk->sk_state) { + case BT_LISTEN: + l2cap_sock_cleanup_listen(sk); + break; + + case BT_CONNECTED: + case BT_CONFIG: + if (sk->sk_type == SOCK_SEQPACKET || + sk->sk_type == SOCK_STREAM) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + + l2cap_sock_set_timer(sk, sk->sk_sndtimeo); + l2cap_send_disconn_req(conn, sk, reason); + } else + l2cap_chan_del(sk, reason); + break; + + case BT_CONNECT2: + if (sk->sk_type == SOCK_SEQPACKET || + sk->sk_type == SOCK_STREAM) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct l2cap_conn_rsp rsp; + __u16 result; + + if (bt_sk(sk)->defer_setup) + result = L2CAP_CR_SEC_BLOCK; + else + result = L2CAP_CR_BAD_PSM; + sk->sk_state = BT_DISCONN; + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } else + l2cap_chan_del(sk, reason); + break; + + case BT_CONNECT: + case BT_DISCONN: + l2cap_chan_del(sk, reason); + break; + + default: + sock_set_flag(sk, SOCK_ZAPPED); + break; + } +} + +/* Must be called on unlocked socket. */ +static void l2cap_sock_close(struct sock *sk) +{ + l2cap_sock_clear_timer(sk); + lock_sock(sk); + __l2cap_sock_close(sk, ECONNRESET); + release_sock(sk); + l2cap_sock_kill(sk); +} + +static void l2cap_sock_init(struct sock *sk, struct sock *parent) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + + BT_DBG("sk %p", sk); + + if (parent) { + sk->sk_type = parent->sk_type; + bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup; + + pi->imtu = l2cap_pi(parent)->imtu; + pi->omtu = l2cap_pi(parent)->omtu; + pi->conf_state = l2cap_pi(parent)->conf_state; + pi->mode = l2cap_pi(parent)->mode; + pi->fcs = l2cap_pi(parent)->fcs; + pi->max_tx = l2cap_pi(parent)->max_tx; + pi->tx_win = l2cap_pi(parent)->tx_win; + pi->sec_level = l2cap_pi(parent)->sec_level; + pi->role_switch = l2cap_pi(parent)->role_switch; + pi->force_reliable = l2cap_pi(parent)->force_reliable; + pi->flushable = l2cap_pi(parent)->flushable; + } else { + pi->imtu = L2CAP_DEFAULT_MTU; + pi->omtu = 0; + if (!disable_ertm && sk->sk_type == SOCK_STREAM) { + pi->mode = L2CAP_MODE_ERTM; + pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; + } else { + pi->mode = L2CAP_MODE_BASIC; + } + pi->max_tx = L2CAP_DEFAULT_MAX_TX; + pi->fcs = L2CAP_FCS_CRC16; + pi->tx_win = L2CAP_DEFAULT_TX_WINDOW; + pi->sec_level = BT_SECURITY_LOW; + pi->role_switch = 0; + pi->force_reliable = 0; + pi->flushable = BT_FLUSHABLE_OFF; + } + + /* Default config options */ + pi->conf_len = 0; + pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; + skb_queue_head_init(TX_QUEUE(sk)); + skb_queue_head_init(SREJ_QUEUE(sk)); + skb_queue_head_init(BUSY_QUEUE(sk)); + INIT_LIST_HEAD(SREJ_LIST(sk)); +} + +static struct proto l2cap_proto = { + .name = "L2CAP", + .owner = THIS_MODULE, + .obj_size = sizeof(struct l2cap_pinfo) +}; + +static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +{ + struct sock *sk; + + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + INIT_LIST_HEAD(&bt_sk(sk)->accept_q); + + sk->sk_destruct = l2cap_sock_destruct; + sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); + + sock_reset_flag(sk, SOCK_ZAPPED); + + sk->sk_protocol = proto; + sk->sk_state = BT_OPEN; + + setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk); + + bt_sock_link(&l2cap_sk_list, sk); + return sk; +} + +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + + BT_DBG("sock %p", sock); + + sock->state = SS_UNCONNECTED; + + if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM && + sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) + return -ESOCKTNOSUPPORT; + + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) + return -EPERM; + + sock->ops = &l2cap_sock_ops; + + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); + if (!sk) + return -ENOMEM; + + l2cap_sock_init(sk, NULL); + return 0; +} + +static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) +{ + struct sock *sk = sock->sk; + struct sockaddr_l2 la; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (!addr || addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + + memset(&la, 0, sizeof(la)); + len = min_t(unsigned int, sizeof(la), alen); + memcpy(&la, addr, len); + + if (la.l2_cid) + return -EINVAL; + + lock_sock(sk); + + if (sk->sk_state != BT_OPEN) { + err = -EBADFD; + goto done; + } + + if (la.l2_psm) { + __u16 psm = __le16_to_cpu(la.l2_psm); + + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((psm & 0x0101) != 0x0001) { + err = -EINVAL; + goto done; + } + + /* Restrict usage of well-known PSMs */ + if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { + err = -EACCES; + goto done; + } + } + + write_lock_bh(&l2cap_sk_list.lock); + + if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) { + err = -EADDRINUSE; + } else { + /* Save source address */ + bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); + l2cap_pi(sk)->psm = la.l2_psm; + l2cap_pi(sk)->sport = la.l2_psm; + sk->sk_state = BT_BOUND; + + if (__le16_to_cpu(la.l2_psm) == 0x0001 || + __le16_to_cpu(la.l2_psm) == 0x0003) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; + } + + write_unlock_bh(&l2cap_sk_list.lock); + +done: + release_sock(sk); + return err; +} + +static int l2cap_do_connect(struct sock *sk) +{ + bdaddr_t *src = &bt_sk(sk)->src; + bdaddr_t *dst = &bt_sk(sk)->dst; + struct l2cap_conn *conn; + struct hci_conn *hcon; + struct hci_dev *hdev; + __u8 auth_type; + int err; + + BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), + l2cap_pi(sk)->psm); + + hdev = hci_get_route(dst, src); + if (!hdev) + return -EHOSTUNREACH; + + hci_dev_lock_bh(hdev); + + err = -ENOMEM; + + auth_type = l2cap_get_auth_type(sk); + + hcon = hci_connect(hdev, ACL_LINK, dst, + l2cap_pi(sk)->sec_level, auth_type); + if (!hcon) + goto done; + + conn = l2cap_conn_add(hcon, 0); + if (!conn) { + hci_conn_put(hcon); + goto done; + } + + err = 0; + + /* Update source addr of the socket */ + bacpy(src, conn->src); + + l2cap_chan_add(conn, sk, NULL); + + sk->sk_state = BT_CONNECT; + l2cap_sock_set_timer(sk, sk->sk_sndtimeo); + + if (hcon->state == BT_CONNECTED) { + if (sk->sk_type != SOCK_SEQPACKET && + sk->sk_type != SOCK_STREAM) { + l2cap_sock_clear_timer(sk); + if (l2cap_check_security(sk)) + sk->sk_state = BT_CONNECTED; + } else + l2cap_do_start(sk); + } + +done: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + return err; +} + +static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_l2 la; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (!addr || alen < sizeof(addr->sa_family) || + addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + + memset(&la, 0, sizeof(la)); + len = min_t(unsigned int, sizeof(la), alen); + memcpy(&la, addr, len); + + if (la.l2_cid) + return -EINVAL; + + lock_sock(sk); + + if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) + && !la.l2_psm) { + err = -EINVAL; + goto done; + } + + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -ENOTSUPP; + goto done; + } + + switch (sk->sk_state) { + case BT_CONNECT: + case BT_CONNECT2: + case BT_CONFIG: + /* Already connecting */ + goto wait; + + case BT_CONNECTED: + /* Already connected */ + err = -EISCONN; + goto done; + + case BT_OPEN: + case BT_BOUND: + /* Can connect */ + break; + + default: + err = -EBADFD; + goto done; + } + + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && + sk->sk_type != SOCK_RAW) { + err = -EINVAL; + goto done; + } + + /* Set destination address and psm */ + bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); + l2cap_pi(sk)->psm = la.l2_psm; + + err = l2cap_do_connect(sk); + if (err) + goto done; + +wait: + err = bt_sock_wait_state(sk, BT_CONNECTED, + sock_sndtimeo(sk, flags & O_NONBLOCK)); +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sk %p backlog %d", sk, backlog); + + lock_sock(sk); + + if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) + || sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } + + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -ENOTSUPP; + goto done; + } + + if (!l2cap_pi(sk)->psm) { + bdaddr_t *src = &bt_sk(sk)->src; + u16 psm; + + err = -EINVAL; + + write_lock_bh(&l2cap_sk_list.lock); + + for (psm = 0x1001; psm < 0x1100; psm += 2) + if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) { + l2cap_pi(sk)->psm = cpu_to_le16(psm); + l2cap_pi(sk)->sport = cpu_to_le16(psm); + err = 0; + break; + } + + write_unlock_bh(&l2cap_sk_list.lock); + + if (err < 0) + goto done; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_ack_backlog = 0; + sk->sk_state = BT_LISTEN; + +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + DECLARE_WAITQUEUE(wait, current); + struct sock *sk = sock->sk, *nsk; + long timeo; + int err = 0; + + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; + goto done; + } + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + BT_DBG("sk %p timeo %ld", sk, timeo); + + /* Wait for an incoming connection. (wake-one). */ + add_wait_queue_exclusive(sk_sleep(sk), &wait); + while (!(nsk = bt_accept_dequeue(sk, newsock))) { + set_current_state(TASK_INTERRUPTIBLE); + if (!timeo) { + err = -EAGAIN; + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + } + set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + + if (err) + goto done; + + newsock->state = SS_CONNECTED; + + BT_DBG("new socket %p", nsk); + +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) +{ + struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; + struct sock *sk = sock->sk; + + BT_DBG("sock %p, sk %p", sock, sk); + + addr->sa_family = AF_BLUETOOTH; + *len = sizeof(struct sockaddr_l2); + + if (peer) { + la->l2_psm = l2cap_pi(sk)->psm; + bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); + la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid); + } else { + la->l2_psm = l2cap_pi(sk)->sport; + bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); + la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid); + } + + return 0; +} + +static int __l2cap_wait_ack(struct sock *sk) +{ + DECLARE_WAITQUEUE(wait, current); + int err = 0; + int timeo = HZ/5; + + add_wait_queue(sk_sleep(sk), &wait); + while ((l2cap_pi(sk)->unacked_frames > 0 && l2cap_pi(sk)->conn)) { + set_current_state(TASK_INTERRUPTIBLE); + + if (!timeo) + timeo = HZ/5; + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + + err = sock_error(sk); + if (err) + break; + } + set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + return err; +} + +static void l2cap_monitor_timeout(unsigned long arg) +{ + struct sock *sk = (void *) arg; + + BT_DBG("sk %p", sk); + + bh_lock_sock(sk); + if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) { + l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED); + bh_unlock_sock(sk); + return; + } + + l2cap_pi(sk)->retry_count++; + __mod_monitor_timer(); + + l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL); + bh_unlock_sock(sk); +} + +static void l2cap_retrans_timeout(unsigned long arg) +{ + struct sock *sk = (void *) arg; + + BT_DBG("sk %p", sk); + + bh_lock_sock(sk); + l2cap_pi(sk)->retry_count = 1; + __mod_monitor_timer(); + + l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; + + l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL); + bh_unlock_sock(sk); +} + +static void l2cap_drop_acked_frames(struct sock *sk) +{ + struct sk_buff *skb; + + while ((skb = skb_peek(TX_QUEUE(sk))) && + l2cap_pi(sk)->unacked_frames) { + if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq) + break; + + skb = skb_dequeue(TX_QUEUE(sk)); + kfree_skb(skb); + + l2cap_pi(sk)->unacked_frames--; + } + + if (!l2cap_pi(sk)->unacked_frames) + del_timer(&l2cap_pi(sk)->retrans_timer); +} + +static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct hci_conn *hcon = pi->conn->hcon; + u16 flags; + + BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len); + + if (!pi->flushable && lmp_no_flush_capable(hcon->hdev)) + flags = ACL_START_NO_FLUSH; + else + flags = ACL_START; + + hci_send_acl(hcon, skb, flags); +} + +static void l2cap_streaming_send(struct sock *sk) +{ + struct sk_buff *skb; + struct l2cap_pinfo *pi = l2cap_pi(sk); + u16 control, fcs; + + while ((skb = skb_dequeue(TX_QUEUE(sk)))) { + control = get_unaligned_le16(skb->data + L2CAP_HDR_SIZE); + control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT; + put_unaligned_le16(control, skb->data + L2CAP_HDR_SIZE); + + if (pi->fcs == L2CAP_FCS_CRC16) { + fcs = crc16(0, (u8 *)skb->data, skb->len - 2); + put_unaligned_le16(fcs, skb->data + skb->len - 2); + } + + l2cap_do_send(sk, skb); + + pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; + } +} + +static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *skb, *tx_skb; + u16 control, fcs; + + skb = skb_peek(TX_QUEUE(sk)); + if (!skb) + return; + + do { + if (bt_cb(skb)->tx_seq == tx_seq) + break; + + if (skb_queue_is_last(TX_QUEUE(sk), skb)) + return; + + } while ((skb = skb_queue_next(TX_QUEUE(sk), skb))); + + if (pi->remote_max_tx && + bt_cb(skb)->retries == pi->remote_max_tx) { + l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED); + return; + } + + tx_skb = skb_clone(skb, GFP_ATOMIC); + bt_cb(skb)->retries++; + control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); + + if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { + control |= L2CAP_CTRL_FINAL; + pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; + } + + control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) + | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); + + put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); + + if (pi->fcs == L2CAP_FCS_CRC16) { + fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2); + put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2); + } + + l2cap_do_send(sk, tx_skb); +} + +static int l2cap_ertm_send(struct sock *sk) +{ + struct sk_buff *skb, *tx_skb; + struct l2cap_pinfo *pi = l2cap_pi(sk); + u16 control, fcs; + int nsent = 0; + + if (sk->sk_state != BT_CONNECTED) + return -ENOTCONN; + + while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) { + + if (pi->remote_max_tx && + bt_cb(skb)->retries == pi->remote_max_tx) { + l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED); + break; + } + + tx_skb = skb_clone(skb, GFP_ATOMIC); + + bt_cb(skb)->retries++; + + control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); + control &= L2CAP_CTRL_SAR; + + if (pi->conn_state & L2CAP_CONN_SEND_FBIT) { + control |= L2CAP_CTRL_FINAL; + pi->conn_state &= ~L2CAP_CONN_SEND_FBIT; + } + control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) + | (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); + put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); + + + if (pi->fcs == L2CAP_FCS_CRC16) { + fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2); + put_unaligned_le16(fcs, skb->data + tx_skb->len - 2); + } + + l2cap_do_send(sk, tx_skb); + + __mod_retrans_timer(); + + bt_cb(skb)->tx_seq = pi->next_tx_seq; + pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; + + pi->unacked_frames++; + pi->frames_sent++; + + if (skb_queue_is_last(TX_QUEUE(sk), skb)) + sk->sk_send_head = NULL; + else + sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb); + + nsent++; + } + + return nsent; +} + +static int l2cap_retransmit_frames(struct sock *sk) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + int ret; + + if (!skb_queue_empty(TX_QUEUE(sk))) + sk->sk_send_head = TX_QUEUE(sk)->next; + + pi->next_tx_seq = pi->expected_ack_seq; + ret = l2cap_ertm_send(sk); + return ret; +} + +static void l2cap_send_ack(struct l2cap_pinfo *pi) +{ + struct sock *sk = (struct sock *)pi; + u16 control = 0; + + control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + + if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { + control |= L2CAP_SUPER_RCV_NOT_READY; + pi->conn_state |= L2CAP_CONN_RNR_SENT; + l2cap_send_sframe(pi, control); + return; + } + + if (l2cap_ertm_send(sk) > 0) + return; + + control |= L2CAP_SUPER_RCV_READY; + l2cap_send_sframe(pi, control); +} + +static void l2cap_send_srejtail(struct sock *sk) +{ + struct srej_list *tail; + u16 control; + + control = L2CAP_SUPER_SELECT_REJECT; + control |= L2CAP_CTRL_FINAL; + + tail = list_entry(SREJ_LIST(sk)->prev, struct srej_list, list); + control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; + + l2cap_send_sframe(l2cap_pi(sk), control); +} + +static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct sk_buff **frag; + int err, sent = 0; + + if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) + return -EFAULT; + + sent += count; + len -= count; + + /* Continuation fragments (no L2CAP header) */ + frag = &skb_shinfo(skb)->frag_list; + while (len) { + count = min_t(unsigned int, conn->mtu, len); + + *frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err); + if (!*frag) + return err; + if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count)) + return -EFAULT; + + sent += count; + len -= count; + + frag = &(*frag)->next; + } + + return sent; +} + +static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct sk_buff *skb; + int err, count, hlen = L2CAP_HDR_SIZE + 2; + struct l2cap_hdr *lh; + + BT_DBG("sk %p len %d", sk, (int)len); + + count = min_t(unsigned int, (conn->mtu - hlen), len); + skb = bt_skb_send_alloc(sk, count + hlen, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return ERR_PTR(err); + + /* Create L2CAP header */ + lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid); + lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); + put_unaligned_le16(l2cap_pi(sk)->psm, skb_put(skb, 2)); + + err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb); + if (unlikely(err < 0)) { + kfree_skb(skb); + return ERR_PTR(err); + } + return skb; +} + +static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct sk_buff *skb; + int err, count, hlen = L2CAP_HDR_SIZE; + struct l2cap_hdr *lh; + + BT_DBG("sk %p len %d", sk, (int)len); + + count = min_t(unsigned int, (conn->mtu - hlen), len); + skb = bt_skb_send_alloc(sk, count + hlen, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return ERR_PTR(err); + + /* Create L2CAP header */ + lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid); + lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); + + err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb); + if (unlikely(err < 0)) { + kfree_skb(skb); + return ERR_PTR(err); + } + return skb; +} + +static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen) +{ + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct sk_buff *skb; + int err, count, hlen = L2CAP_HDR_SIZE + 2; + struct l2cap_hdr *lh; + + BT_DBG("sk %p len %d", sk, (int)len); + + if (!conn) + return ERR_PTR(-ENOTCONN); + + if (sdulen) + hlen += 2; + + if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) + hlen += 2; + + count = min_t(unsigned int, (conn->mtu - hlen), len); + skb = bt_skb_send_alloc(sk, count + hlen, + msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) + return ERR_PTR(err); + + /* Create L2CAP header */ + lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid); + lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE)); + put_unaligned_le16(control, skb_put(skb, 2)); + if (sdulen) + put_unaligned_le16(sdulen, skb_put(skb, 2)); + + err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb); + if (unlikely(err < 0)) { + kfree_skb(skb); + return ERR_PTR(err); + } + + if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) + put_unaligned_le16(0, skb_put(skb, 2)); + + bt_cb(skb)->retries = 0; + return skb; +} + +static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *skb; + struct sk_buff_head sar_queue; + u16 control; + size_t size = 0; + + skb_queue_head_init(&sar_queue); + control = L2CAP_SDU_START; + skb = l2cap_create_iframe_pdu(sk, msg, pi->remote_mps, control, len); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + __skb_queue_tail(&sar_queue, skb); + len -= pi->remote_mps; + size += pi->remote_mps; + + while (len > 0) { + size_t buflen; + + if (len > pi->remote_mps) { + control = L2CAP_SDU_CONTINUE; + buflen = pi->remote_mps; + } else { + control = L2CAP_SDU_END; + buflen = len; + } + + skb = l2cap_create_iframe_pdu(sk, msg, buflen, control, 0); + if (IS_ERR(skb)) { + skb_queue_purge(&sar_queue); + return PTR_ERR(skb); + } + + __skb_queue_tail(&sar_queue, skb); + len -= buflen; + size += buflen; + } + skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk)); + if (sk->sk_send_head == NULL) + sk->sk_send_head = sar_queue.next; + + return size; +} + +static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *skb; + u16 control; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); + + err = sock_error(sk); + if (err) + return err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + goto done; + } + + /* Connectionless channel */ + if (sk->sk_type == SOCK_DGRAM) { + skb = l2cap_create_connless_pdu(sk, msg, len); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + } else { + l2cap_do_send(sk, skb); + err = len; + } + goto done; + } + + switch (pi->mode) { + case L2CAP_MODE_BASIC: + /* Check outgoing MTU */ + if (len > pi->omtu) { + err = -EMSGSIZE; + goto done; + } + + /* Create a basic PDU */ + skb = l2cap_create_basic_pdu(sk, msg, len); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto done; + } + + l2cap_do_send(sk, skb); + err = len; + break; + + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + /* Entire SDU fits into one PDU */ + if (len <= pi->remote_mps) { + control = L2CAP_SDU_UNSEGMENTED; + skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto done; + } + __skb_queue_tail(TX_QUEUE(sk), skb); + + if (sk->sk_send_head == NULL) + sk->sk_send_head = skb; + + } else { + /* Segment SDU into multiples PDUs */ + err = l2cap_sar_segment_sdu(sk, msg, len); + if (err < 0) + goto done; + } + + if (pi->mode == L2CAP_MODE_STREAMING) { + l2cap_streaming_send(sk); + } else { + if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && + (pi->conn_state & L2CAP_CONN_WAIT_F)) { + err = len; + break; + } + err = l2cap_ertm_send(sk); + } + + if (err >= 0) + err = len; + break; + + default: + BT_DBG("bad state %1.1x", pi->mode); + err = -EBADFD; + } + +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk = sock->sk; + + lock_sock(sk); + + if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { + struct l2cap_conn_rsp rsp; + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + u8 buf[128]; + + sk->sk_state = BT_CONFIG; + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) { + release_sock(sk); + return 0; + } + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + + release_sock(sk); + return 0; + } + + release_sock(sk); + + if (sock->type == SOCK_STREAM) + return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); + + return bt_sock_recvmsg(iocb, sock, msg, len, flags); +} + +static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct l2cap_options opts; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + lock_sock(sk); + + switch (optname) { + case L2CAP_OPTIONS: + if (sk->sk_state == BT_CONNECTED) { + err = -EINVAL; + break; + } + + opts.imtu = l2cap_pi(sk)->imtu; + opts.omtu = l2cap_pi(sk)->omtu; + opts.flush_to = l2cap_pi(sk)->flush_to; + opts.mode = l2cap_pi(sk)->mode; + opts.fcs = l2cap_pi(sk)->fcs; + opts.max_tx = l2cap_pi(sk)->max_tx; + opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; + + len = min_t(unsigned int, sizeof(opts), optlen); + if (copy_from_user((char *) &opts, optval, len)) { + err = -EFAULT; + break; + } + + if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { + err = -EINVAL; + break; + } + + l2cap_pi(sk)->mode = opts.mode; + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE; + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -EINVAL; + break; + } + + l2cap_pi(sk)->imtu = opts.imtu; + l2cap_pi(sk)->omtu = opts.omtu; + l2cap_pi(sk)->fcs = opts.fcs; + l2cap_pi(sk)->max_tx = opts.max_tx; + l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size; + break; + + case L2CAP_LM: + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt & L2CAP_LM_AUTH) + l2cap_pi(sk)->sec_level = BT_SECURITY_LOW; + if (opt & L2CAP_LM_ENCRYPT) + l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM; + if (opt & L2CAP_LM_SECURE) + l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH; + + l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER); + l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE); + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct bt_security sec; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + if (level == SOL_L2CAP) + return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); + + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + + lock_sock(sk); + + switch (optname) { + case BT_SECURITY: + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM + && sk->sk_type != SOCK_RAW) { + err = -EINVAL; + break; + } + + sec.level = BT_SECURITY_LOW; + + len = min_t(unsigned int, sizeof(sec), optlen); + if (copy_from_user((char *) &sec, optval, len)) { + err = -EFAULT; + break; + } + + if (sec.level < BT_SECURITY_LOW || + sec.level > BT_SECURITY_HIGH) { + err = -EINVAL; + break; + } + + l2cap_pi(sk)->sec_level = sec.level; + break; + + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + bt_sk(sk)->defer_setup = opt; + break; + + case BT_FLUSHABLE: + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > BT_FLUSHABLE_ON) { + err = -EINVAL; + break; + } + + if (opt == BT_FLUSHABLE_OFF) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + /* proceed futher only when we have l2cap_conn and + No Flush support in the LM */ + if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) { + err = -EINVAL; + break; + } + } + + l2cap_pi(sk)->flushable = opt; + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct l2cap_options opts; + struct l2cap_conninfo cinfo; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case L2CAP_OPTIONS: + opts.imtu = l2cap_pi(sk)->imtu; + opts.omtu = l2cap_pi(sk)->omtu; + opts.flush_to = l2cap_pi(sk)->flush_to; + opts.mode = l2cap_pi(sk)->mode; + opts.fcs = l2cap_pi(sk)->fcs; + opts.max_tx = l2cap_pi(sk)->max_tx; + opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; + + len = min_t(unsigned int, len, sizeof(opts)); + if (copy_to_user(optval, (char *) &opts, len)) + err = -EFAULT; + + break; + + case L2CAP_LM: + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_LOW: + opt = L2CAP_LM_AUTH; + break; + case BT_SECURITY_MEDIUM: + opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT; + break; + case BT_SECURITY_HIGH: + opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT | + L2CAP_LM_SECURE; + break; + default: + opt = 0; + break; + } + + if (l2cap_pi(sk)->role_switch) + opt |= L2CAP_LM_MASTER; + + if (l2cap_pi(sk)->force_reliable) + opt |= L2CAP_LM_RELIABLE; + + if (put_user(opt, (u32 __user *) optval)) + err = -EFAULT; + break; + + case L2CAP_CONNINFO: + if (sk->sk_state != BT_CONNECTED && + !(sk->sk_state == BT_CONNECT2 && + bt_sk(sk)->defer_setup)) { + err = -ENOTCONN; + break; + } + + cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle; + memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3); + + len = min_t(unsigned int, len, sizeof(cinfo)); + if (copy_to_user(optval, (char *) &cinfo, len)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct bt_security sec; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (level == SOL_L2CAP) + return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); + + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case BT_SECURITY: + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM + && sk->sk_type != SOCK_RAW) { + err = -EINVAL; + break; + } + + sec.level = l2cap_pi(sk)->sec_level; + + len = min_t(unsigned int, len, sizeof(sec)); + if (copy_to_user(optval, (char *) &sec, len)) + err = -EFAULT; + + break; + + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) + err = -EFAULT; + + break; + + case BT_FLUSHABLE: + if (put_user(l2cap_pi(sk)->flushable, (u32 __user *) optval)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_shutdown(struct socket *sock, int how) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sock %p, sk %p", sock, sk); + + if (!sk) + return 0; + + lock_sock(sk); + if (!sk->sk_shutdown) { + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) + err = __l2cap_wait_ack(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + l2cap_sock_clear_timer(sk); + __l2cap_sock_close(sk, 0); + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); + } + + if (!err && sk->sk_err) + err = -sk->sk_err; + + release_sock(sk); + return err; +} + +static int l2cap_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); + + if (!sk) + return 0; + + err = l2cap_sock_shutdown(sock, 2); + + sock_orphan(sk); + l2cap_sock_kill(sk); + return err; +} + +static void l2cap_chan_ready(struct sock *sk) +{ + struct sock *parent = bt_sk(sk)->parent; + + BT_DBG("sk %p, parent %p", sk, parent); + + l2cap_pi(sk)->conf_state = 0; + l2cap_sock_clear_timer(sk); + + if (!parent) { + /* Outgoing channel. + * Wake up socket sleeping on connect. + */ + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } else { + /* Incoming channel. + * Wake up socket sleeping on accept. + */ + parent->sk_data_ready(parent, 0); + } +} + +/* Copy frame to all raw sockets on that connection */ +static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) +{ + struct l2cap_chan_list *l = &conn->chan_list; + struct sk_buff *nskb; + struct sock *sk; + + BT_DBG("conn %p", conn); + + read_lock(&l->lock); + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + if (sk->sk_type != SOCK_RAW) + continue; + + /* Don't send frame to the socket it came from */ + if (skb->sk == sk) + continue; + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + continue; + + if (sock_queue_rcv_skb(sk, nskb)) + kfree_skb(nskb); + } + read_unlock(&l->lock); +} + +/* ---- L2CAP signalling commands ---- */ +static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, + u8 code, u8 ident, u16 dlen, void *data) +{ + struct sk_buff *skb, **frag; + struct l2cap_cmd_hdr *cmd; + struct l2cap_hdr *lh; + int len, count; + + BT_DBG("conn %p, code 0x%2.2x, ident 0x%2.2x, len %d", + conn, code, ident, dlen); + + len = L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE + dlen; + count = min_t(unsigned int, conn->mtu, len); + + skb = bt_skb_alloc(count, GFP_ATOMIC); + if (!skb) + return NULL; + + lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); + lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); + lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); + + cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE); + cmd->code = code; + cmd->ident = ident; + cmd->len = cpu_to_le16(dlen); + + if (dlen) { + count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE; + memcpy(skb_put(skb, count), data, count); + data += count; + } + + len -= skb->len; + + /* Continuation fragments (no L2CAP header) */ + frag = &skb_shinfo(skb)->frag_list; + while (len) { + count = min_t(unsigned int, conn->mtu, len); + + *frag = bt_skb_alloc(count, GFP_ATOMIC); + if (!*frag) + goto fail; + + memcpy(skb_put(*frag, count), data, count); + + len -= count; + data += count; + + frag = &(*frag)->next; + } + + return skb; + +fail: + kfree_skb(skb); + return NULL; +} + +static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, unsigned long *val) +{ + struct l2cap_conf_opt *opt = *ptr; + int len; + + len = L2CAP_CONF_OPT_SIZE + opt->len; + *ptr += len; + + *type = opt->type; + *olen = opt->len; + + switch (opt->len) { + case 1: + *val = *((u8 *) opt->val); + break; + + case 2: + *val = get_unaligned_le16(opt->val); + break; + + case 4: + *val = get_unaligned_le32(opt->val); + break; + + default: + *val = (unsigned long) opt->val; + break; + } + + BT_DBG("type 0x%2.2x len %d val 0x%lx", *type, opt->len, *val); + return len; +} + +static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) +{ + struct l2cap_conf_opt *opt = *ptr; + + BT_DBG("type 0x%2.2x len %d val 0x%lx", type, len, val); + + opt->type = type; + opt->len = len; + + switch (len) { + case 1: + *((u8 *) opt->val) = val; + break; + + case 2: + put_unaligned_le16(val, opt->val); + break; + + case 4: + put_unaligned_le32(val, opt->val); + break; + + default: + memcpy(opt->val, (void *) val, len); + break; + } + + *ptr += L2CAP_CONF_OPT_SIZE + len; +} + +static void l2cap_ack_timeout(unsigned long arg) +{ + struct sock *sk = (void *) arg; + + bh_lock_sock(sk); + l2cap_send_ack(l2cap_pi(sk)); + bh_unlock_sock(sk); +} + +static inline void l2cap_ertm_init(struct sock *sk) +{ + l2cap_pi(sk)->expected_ack_seq = 0; + l2cap_pi(sk)->unacked_frames = 0; + l2cap_pi(sk)->buffer_seq = 0; + l2cap_pi(sk)->num_acked = 0; + l2cap_pi(sk)->frames_sent = 0; + + setup_timer(&l2cap_pi(sk)->retrans_timer, + l2cap_retrans_timeout, (unsigned long) sk); + setup_timer(&l2cap_pi(sk)->monitor_timer, + l2cap_monitor_timeout, (unsigned long) sk); + setup_timer(&l2cap_pi(sk)->ack_timer, + l2cap_ack_timeout, (unsigned long) sk); + + __skb_queue_head_init(SREJ_QUEUE(sk)); + __skb_queue_head_init(BUSY_QUEUE(sk)); + + INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work); + + sk->sk_backlog_rcv = l2cap_ertm_data_rcv; +} + +static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) +{ + switch (mode) { + case L2CAP_MODE_STREAMING: + case L2CAP_MODE_ERTM: + if (l2cap_mode_supported(mode, remote_feat_mask)) + return mode; + /* fall through */ + default: + return L2CAP_MODE_BASIC; + } +} + +static int l2cap_build_conf_req(struct sock *sk, void *data) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct l2cap_conf_req *req = data; + struct l2cap_conf_rfc rfc = { .mode = pi->mode }; + void *ptr = req->data; + + BT_DBG("sk %p", sk); + + if (pi->num_conf_req || pi->num_conf_rsp) + goto done; + + switch (pi->mode) { + case L2CAP_MODE_STREAMING: + case L2CAP_MODE_ERTM: + if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE) + break; + + /* fall through */ + default: + pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask); + break; + } + +done: + if (pi->imtu != L2CAP_DEFAULT_MTU) + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); + + switch (pi->mode) { + case L2CAP_MODE_BASIC: + if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) && + !(pi->conn->feat_mask & L2CAP_FEAT_STREAMING)) + break; + + rfc.mode = L2CAP_MODE_BASIC; + rfc.txwin_size = 0; + rfc.max_transmit = 0; + rfc.retrans_timeout = 0; + rfc.monitor_timeout = 0; + rfc.max_pdu_size = 0; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc); + break; + + case L2CAP_MODE_ERTM: + rfc.mode = L2CAP_MODE_ERTM; + rfc.txwin_size = pi->tx_win; + rfc.max_transmit = pi->max_tx; + rfc.retrans_timeout = 0; + rfc.monitor_timeout = 0; + rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); + if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) + rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc); + + if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) + break; + + if (pi->fcs == L2CAP_FCS_NONE || + pi->conf_state & L2CAP_CONF_NO_FCS_RECV) { + pi->fcs = L2CAP_FCS_NONE; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, pi->fcs); + } + break; + + case L2CAP_MODE_STREAMING: + rfc.mode = L2CAP_MODE_STREAMING; + rfc.txwin_size = 0; + rfc.max_transmit = 0; + rfc.retrans_timeout = 0; + rfc.monitor_timeout = 0; + rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); + if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10) + rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc); + + if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) + break; + + if (pi->fcs == L2CAP_FCS_NONE || + pi->conf_state & L2CAP_CONF_NO_FCS_RECV) { + pi->fcs = L2CAP_FCS_NONE; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, pi->fcs); + } + break; + } + + /* FIXME: Need actual value of the flush timeout */ + //if (flush_to != L2CAP_DEFAULT_FLUSH_TO) + // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to); + + req->dcid = cpu_to_le16(pi->dcid); + req->flags = cpu_to_le16(0); + + return ptr - data; +} + +static int l2cap_parse_conf_req(struct sock *sk, void *data) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct l2cap_conf_rsp *rsp = data; + void *ptr = rsp->data; + void *req = pi->conf_req; + int len = pi->conf_len; + int type, hint, olen; + unsigned long val; + struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; + u16 mtu = L2CAP_DEFAULT_MTU; + u16 result = L2CAP_CONF_SUCCESS; + + BT_DBG("sk %p", sk); + + while (len >= L2CAP_CONF_OPT_SIZE) { + len -= l2cap_get_conf_opt(&req, &type, &olen, &val); + + hint = type & L2CAP_CONF_HINT; + type &= L2CAP_CONF_MASK; + + switch (type) { + case L2CAP_CONF_MTU: + mtu = val; + break; + + case L2CAP_CONF_FLUSH_TO: + pi->flush_to = val; + break; + + case L2CAP_CONF_QOS: + break; + + case L2CAP_CONF_RFC: + if (olen == sizeof(rfc)) + memcpy(&rfc, (void *) val, olen); + break; + + case L2CAP_CONF_FCS: + if (val == L2CAP_FCS_NONE) + pi->conf_state |= L2CAP_CONF_NO_FCS_RECV; + + break; + + default: + if (hint) + break; + + result = L2CAP_CONF_UNKNOWN; + *((u8 *) ptr++) = type; + break; + } + } + + if (pi->num_conf_rsp || pi->num_conf_req > 1) + goto done; + + switch (pi->mode) { + case L2CAP_MODE_STREAMING: + case L2CAP_MODE_ERTM: + if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) { + pi->mode = l2cap_select_mode(rfc.mode, + pi->conn->feat_mask); + break; + } + + if (pi->mode != rfc.mode) + return -ECONNREFUSED; + + break; + } + +done: + if (pi->mode != rfc.mode) { + result = L2CAP_CONF_UNACCEPT; + rfc.mode = pi->mode; + + if (pi->num_conf_rsp == 1) + return -ECONNREFUSED; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc); + } + + + if (result == L2CAP_CONF_SUCCESS) { + /* Configure output options and let the other side know + * which ones we don't like. */ + + if (mtu < L2CAP_DEFAULT_MIN_MTU) + result = L2CAP_CONF_UNACCEPT; + else { + pi->omtu = mtu; + pi->conf_state |= L2CAP_CONF_MTU_DONE; + } + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu); + + switch (rfc.mode) { + case L2CAP_MODE_BASIC: + pi->fcs = L2CAP_FCS_NONE; + pi->conf_state |= L2CAP_CONF_MODE_DONE; + break; + + case L2CAP_MODE_ERTM: + pi->remote_tx_win = rfc.txwin_size; + pi->remote_max_tx = rfc.max_transmit; + + if (le16_to_cpu(rfc.max_pdu_size) > pi->conn->mtu - 10) + rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); + + pi->remote_mps = le16_to_cpu(rfc.max_pdu_size); + + rfc.retrans_timeout = + le16_to_cpu(L2CAP_DEFAULT_RETRANS_TO); + rfc.monitor_timeout = + le16_to_cpu(L2CAP_DEFAULT_MONITOR_TO); + + pi->conf_state |= L2CAP_CONF_MODE_DONE; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc); + + break; + + case L2CAP_MODE_STREAMING: + if (le16_to_cpu(rfc.max_pdu_size) > pi->conn->mtu - 10) + rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10); + + pi->remote_mps = le16_to_cpu(rfc.max_pdu_size); + + pi->conf_state |= L2CAP_CONF_MODE_DONE; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc); + + break; + + default: + result = L2CAP_CONF_UNACCEPT; + + memset(&rfc, 0, sizeof(rfc)); + rfc.mode = pi->mode; + } + + if (result == L2CAP_CONF_SUCCESS) + pi->conf_state |= L2CAP_CONF_OUTPUT_DONE; + } + rsp->scid = cpu_to_le16(pi->dcid); + rsp->result = cpu_to_le16(result); + rsp->flags = cpu_to_le16(0x0000); + + return ptr - data; +} + +static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, u16 *result) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct l2cap_conf_req *req = data; + void *ptr = req->data; + int type, olen; + unsigned long val; + struct l2cap_conf_rfc rfc; + + BT_DBG("sk %p, rsp %p, len %d, req %p", sk, rsp, len, data); + + while (len >= L2CAP_CONF_OPT_SIZE) { + len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + + switch (type) { + case L2CAP_CONF_MTU: + if (val < L2CAP_DEFAULT_MIN_MTU) { + *result = L2CAP_CONF_UNACCEPT; + pi->imtu = L2CAP_DEFAULT_MIN_MTU; + } else + pi->imtu = val; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); + break; + + case L2CAP_CONF_FLUSH_TO: + pi->flush_to = val; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, + 2, pi->flush_to); + break; + + case L2CAP_CONF_RFC: + if (olen == sizeof(rfc)) + memcpy(&rfc, (void *)val, olen); + + if ((pi->conf_state & L2CAP_CONF_STATE2_DEVICE) && + rfc.mode != pi->mode) + return -ECONNREFUSED; + + pi->fcs = 0; + + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, + sizeof(rfc), (unsigned long) &rfc); + break; + } + } + + if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode) + return -ECONNREFUSED; + + pi->mode = rfc.mode; + + if (*result == L2CAP_CONF_SUCCESS) { + switch (rfc.mode) { + case L2CAP_MODE_ERTM: + pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); + pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); + pi->mps = le16_to_cpu(rfc.max_pdu_size); + break; + case L2CAP_MODE_STREAMING: + pi->mps = le16_to_cpu(rfc.max_pdu_size); + } + } + + req->dcid = cpu_to_le16(pi->dcid); + req->flags = cpu_to_le16(0x0000); + + return ptr - data; +} + +static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 flags) +{ + struct l2cap_conf_rsp *rsp = data; + void *ptr = rsp->data; + + BT_DBG("sk %p", sk); + + rsp->scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp->result = cpu_to_le16(result); + rsp->flags = cpu_to_le16(flags); + + return ptr - data; +} + +static void l2cap_conf_rfc_get(struct sock *sk, void *rsp, int len) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + int type, olen; + unsigned long val; + struct l2cap_conf_rfc rfc; + + BT_DBG("sk %p, rsp %p, len %d", sk, rsp, len); + + if ((pi->mode != L2CAP_MODE_ERTM) && (pi->mode != L2CAP_MODE_STREAMING)) + return; + + while (len >= L2CAP_CONF_OPT_SIZE) { + len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + + switch (type) { + case L2CAP_CONF_RFC: + if (olen == sizeof(rfc)) + memcpy(&rfc, (void *)val, olen); + goto done; + } + } + +done: + switch (rfc.mode) { + case L2CAP_MODE_ERTM: + pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout); + pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout); + pi->mps = le16_to_cpu(rfc.max_pdu_size); + break; + case L2CAP_MODE_STREAMING: + pi->mps = le16_to_cpu(rfc.max_pdu_size); + } +} + +static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data; + + if (rej->reason != 0x0000) + return 0; + + if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && + cmd->ident == conn->info_ident) { + del_timer(&conn->info_timer); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; + + l2cap_conn_start(conn); + } + + return 0; +} + +static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_chan_list *list = &conn->chan_list; + struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; + struct l2cap_conn_rsp rsp; + struct sock *parent, *sk = NULL; + int result, status = L2CAP_CS_NO_INFO; + + u16 dcid = 0, scid = __le16_to_cpu(req->scid); + __le16 psm = req->psm; + + BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid); + + /* Check if we have socket listening on psm */ + parent = l2cap_get_sock_by_psm(BT_LISTEN, psm, conn->src); + if (!parent) { + result = L2CAP_CR_BAD_PSM; + goto sendresp; + } + + bh_lock_sock(parent); + + /* Check if the ACL is secure enough (if not SDP) */ + if (psm != cpu_to_le16(0x0001) && + !hci_conn_check_link_mode(conn->hcon)) { + conn->disc_reason = 0x05; + result = L2CAP_CR_SEC_BLOCK; + goto response; + } + + result = L2CAP_CR_NO_MEM; + + /* Check for backlog size */ + if (sk_acceptq_is_full(parent)) { + BT_DBG("backlog full %d", parent->sk_ack_backlog); + goto response; + } + + sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); + if (!sk) + goto response; + + write_lock_bh(&list->lock); + + /* Check if we already have channel with that dcid */ + if (__l2cap_get_chan_by_dcid(list, scid)) { + write_unlock_bh(&list->lock); + sock_set_flag(sk, SOCK_ZAPPED); + l2cap_sock_kill(sk); + goto response; + } + + hci_conn_hold(conn->hcon); + + l2cap_sock_init(sk, parent); + bacpy(&bt_sk(sk)->src, conn->src); + bacpy(&bt_sk(sk)->dst, conn->dst); + l2cap_pi(sk)->psm = psm; + l2cap_pi(sk)->dcid = scid; + + __l2cap_chan_add(conn, sk, parent); + dcid = l2cap_pi(sk)->scid; + + l2cap_sock_set_timer(sk, sk->sk_sndtimeo); + + l2cap_pi(sk)->ident = cmd->ident; + + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) { + if (l2cap_check_security(sk)) { + if (bt_sk(sk)->defer_setup) { + sk->sk_state = BT_CONNECT2; + result = L2CAP_CR_PEND; + status = L2CAP_CS_AUTHOR_PEND; + parent->sk_data_ready(parent, 0); + } else { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + status = L2CAP_CS_NO_INFO; + } + } else { + sk->sk_state = BT_CONNECT2; + result = L2CAP_CR_PEND; + status = L2CAP_CS_AUTHEN_PEND; + } + } else { + sk->sk_state = BT_CONNECT2; + result = L2CAP_CR_PEND; + status = L2CAP_CS_NO_INFO; + } + + write_unlock_bh(&list->lock); + +response: + bh_unlock_sock(parent); + +sendresp: + rsp.scid = cpu_to_le16(scid); + rsp.dcid = cpu_to_le16(dcid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(status); + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (result == L2CAP_CR_PEND && status == L2CAP_CS_NO_INFO) { + struct l2cap_info_req info; + info.type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_SENT; + conn->info_ident = l2cap_get_ident(conn); + + mod_timer(&conn->info_timer, jiffies + + msecs_to_jiffies(L2CAP_INFO_TIMEOUT)); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(info), &info); + } + + if (sk && !(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) && + result == L2CAP_CR_SUCCESS) { + u8 buf[128]; + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + } + + return 0; +} + +static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_conn_rsp *rsp = (struct l2cap_conn_rsp *) data; + u16 scid, dcid, result, status; + struct sock *sk; + u8 req[128]; + + scid = __le16_to_cpu(rsp->scid); + dcid = __le16_to_cpu(rsp->dcid); + result = __le16_to_cpu(rsp->result); + status = __le16_to_cpu(rsp->status); + + BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); + + if (scid) { + sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); + if (!sk) + return -EFAULT; + } else { + sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident); + if (!sk) + return -EFAULT; + } + + switch (result) { + case L2CAP_CR_SUCCESS: + sk->sk_state = BT_CONFIG; + l2cap_pi(sk)->ident = 0; + l2cap_pi(sk)->dcid = dcid; + l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) + break; + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, req), req); + l2cap_pi(sk)->num_conf_req++; + break; + + case L2CAP_CR_PEND: + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; + break; + + default: + /* don't delete l2cap channel if sk is owned by user */ + if (sock_owned_by_user(sk)) { + sk->sk_state = BT_DISCONN; + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 5); + break; + } + + l2cap_chan_del(sk, ECONNREFUSED); + break; + } + + bh_unlock_sock(sk); + return 0; +} + +static inline void set_default_fcs(struct l2cap_pinfo *pi) +{ + /* FCS is enabled only in ERTM or streaming mode, if one or both + * sides request it. + */ + if (pi->mode != L2CAP_MODE_ERTM && pi->mode != L2CAP_MODE_STREAMING) + pi->fcs = L2CAP_FCS_NONE; + else if (!(pi->conf_state & L2CAP_CONF_NO_FCS_RECV)) + pi->fcs = L2CAP_FCS_CRC16; +} + +static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) +{ + struct l2cap_conf_req *req = (struct l2cap_conf_req *) data; + u16 dcid, flags; + u8 rsp[64]; + struct sock *sk; + int len; + + dcid = __le16_to_cpu(req->dcid); + flags = __le16_to_cpu(req->flags); + + BT_DBG("dcid 0x%4.4x flags 0x%2.2x", dcid, flags); + + sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid); + if (!sk) + return -ENOENT; + + if (sk->sk_state != BT_CONFIG) { + struct l2cap_cmd_rej rej; + + rej.reason = cpu_to_le16(0x0002); + l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, + sizeof(rej), &rej); + goto unlock; + } + + /* Reject if config buffer is too small. */ + len = cmd_len - sizeof(*req); + if (l2cap_pi(sk)->conf_len + len > sizeof(l2cap_pi(sk)->conf_req)) { + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, + l2cap_build_conf_rsp(sk, rsp, + L2CAP_CONF_REJECT, flags), rsp); + goto unlock; + } + + /* Store config. */ + memcpy(l2cap_pi(sk)->conf_req + l2cap_pi(sk)->conf_len, req->data, len); + l2cap_pi(sk)->conf_len += len; + + if (flags & 0x0001) { + /* Incomplete config. Send empty response. */ + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, + l2cap_build_conf_rsp(sk, rsp, + L2CAP_CONF_SUCCESS, 0x0001), rsp); + goto unlock; + } + + /* Complete config. */ + len = l2cap_parse_conf_req(sk, rsp); + if (len < 0) { + l2cap_send_disconn_req(conn, sk, ECONNRESET); + goto unlock; + } + + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); + l2cap_pi(sk)->num_conf_rsp++; + + /* Reset config buffer. */ + l2cap_pi(sk)->conf_len = 0; + + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE)) + goto unlock; + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) { + set_default_fcs(l2cap_pi(sk)); + + sk->sk_state = BT_CONNECTED; + + l2cap_pi(sk)->next_tx_seq = 0; + l2cap_pi(sk)->expected_tx_seq = 0; + __skb_queue_head_init(TX_QUEUE(sk)); + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) + l2cap_ertm_init(sk); + + l2cap_chan_ready(sk); + goto unlock; + } + + if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { + u8 buf[64]; + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + } + +unlock: + bh_unlock_sock(sk); + return 0; +} + +static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data; + u16 scid, flags, result; + struct sock *sk; + int len = cmd->len - sizeof(*rsp); + + scid = __le16_to_cpu(rsp->scid); + flags = __le16_to_cpu(rsp->flags); + result = __le16_to_cpu(rsp->result); + + BT_DBG("scid 0x%4.4x flags 0x%2.2x result 0x%2.2x", + scid, flags, result); + + sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); + if (!sk) + return 0; + + switch (result) { + case L2CAP_CONF_SUCCESS: + l2cap_conf_rfc_get(sk, rsp->data, len); + break; + + case L2CAP_CONF_UNACCEPT: + if (l2cap_pi(sk)->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) { + char req[64]; + + if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { + l2cap_send_disconn_req(conn, sk, ECONNRESET); + goto done; + } + + /* throw out any old stored conf requests */ + result = L2CAP_CONF_SUCCESS; + len = l2cap_parse_conf_rsp(sk, rsp->data, + len, req, &result); + if (len < 0) { + l2cap_send_disconn_req(conn, sk, ECONNRESET); + goto done; + } + + l2cap_send_cmd(conn, l2cap_get_ident(conn), + L2CAP_CONF_REQ, len, req); + l2cap_pi(sk)->num_conf_req++; + if (result != L2CAP_CONF_SUCCESS) + goto done; + break; + } + + default: + sk->sk_err = ECONNRESET; + l2cap_sock_set_timer(sk, HZ * 5); + l2cap_send_disconn_req(conn, sk, ECONNRESET); + goto done; + } + + if (flags & 0x01) + goto done; + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE; + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) { + set_default_fcs(l2cap_pi(sk)); + + sk->sk_state = BT_CONNECTED; + l2cap_pi(sk)->next_tx_seq = 0; + l2cap_pi(sk)->expected_tx_seq = 0; + __skb_queue_head_init(TX_QUEUE(sk)); + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) + l2cap_ertm_init(sk); + + l2cap_chan_ready(sk); + } + +done: + bh_unlock_sock(sk); + return 0; +} + +static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_disconn_req *req = (struct l2cap_disconn_req *) data; + struct l2cap_disconn_rsp rsp; + u16 dcid, scid; + struct sock *sk; + + scid = __le16_to_cpu(req->scid); + dcid = __le16_to_cpu(req->dcid); + + BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid); + + sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid); + if (!sk) + return 0; + + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp); + + sk->sk_shutdown = SHUTDOWN_MASK; + + /* don't delete l2cap channel if sk is owned by user */ + if (sock_owned_by_user(sk)) { + sk->sk_state = BT_DISCONN; + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 5); + bh_unlock_sock(sk); + return 0; + } + + l2cap_chan_del(sk, ECONNRESET); + bh_unlock_sock(sk); + + l2cap_sock_kill(sk); + return 0; +} + +static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_disconn_rsp *rsp = (struct l2cap_disconn_rsp *) data; + u16 dcid, scid; + struct sock *sk; + + scid = __le16_to_cpu(rsp->scid); + dcid = __le16_to_cpu(rsp->dcid); + + BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid); + + sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); + if (!sk) + return 0; + + /* don't delete l2cap channel if sk is owned by user */ + if (sock_owned_by_user(sk)) { + sk->sk_state = BT_DISCONN; + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 5); + bh_unlock_sock(sk); + return 0; + } + + l2cap_chan_del(sk, 0); + bh_unlock_sock(sk); + + l2cap_sock_kill(sk); + return 0; +} + +static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_info_req *req = (struct l2cap_info_req *) data; + u16 type; + + type = __le16_to_cpu(req->type); + + BT_DBG("type 0x%4.4x", type); + + if (type == L2CAP_IT_FEAT_MASK) { + u8 buf[8]; + u32 feat_mask = l2cap_feat_mask; + struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; + rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); + rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); + if (!disable_ertm) + feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING + | L2CAP_FEAT_FCS; + put_unaligned_le32(feat_mask, rsp->data); + l2cap_send_cmd(conn, cmd->ident, + L2CAP_INFO_RSP, sizeof(buf), buf); + } else if (type == L2CAP_IT_FIXED_CHAN) { + u8 buf[12]; + struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; + rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); + rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); + memcpy(buf + 4, l2cap_fixed_chan, 8); + l2cap_send_cmd(conn, cmd->ident, + L2CAP_INFO_RSP, sizeof(buf), buf); + } else { + struct l2cap_info_rsp rsp; + rsp.type = cpu_to_le16(type); + rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP); + l2cap_send_cmd(conn, cmd->ident, + L2CAP_INFO_RSP, sizeof(rsp), &rsp); + } + + return 0; +} + +static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) data; + u16 type, result; + + type = __le16_to_cpu(rsp->type); + result = __le16_to_cpu(rsp->result); + + BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); + + del_timer(&conn->info_timer); + + if (result != L2CAP_IR_SUCCESS) { + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; + + l2cap_conn_start(conn); + + return 0; + } + + if (type == L2CAP_IT_FEAT_MASK) { + conn->feat_mask = get_unaligned_le32(rsp->data); + + if (conn->feat_mask & L2CAP_FEAT_FIXED_CHAN) { + struct l2cap_info_req req; + req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); + + conn->info_ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(req), &req); + } else { + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; + + l2cap_conn_start(conn); + } + } else if (type == L2CAP_IT_FIXED_CHAN) { + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; + + l2cap_conn_start(conn); + } + + return 0; +} + +static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) +{ + u8 *data = skb->data; + int len = skb->len; + struct l2cap_cmd_hdr cmd; + int err = 0; + + l2cap_raw_recv(conn, skb); + + while (len >= L2CAP_CMD_HDR_SIZE) { + u16 cmd_len; + memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); + data += L2CAP_CMD_HDR_SIZE; + len -= L2CAP_CMD_HDR_SIZE; + + cmd_len = le16_to_cpu(cmd.len); + + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, cmd.ident); + + if (cmd_len > len || !cmd.ident) { + BT_DBG("corrupted command"); + break; + } + + switch (cmd.code) { + case L2CAP_COMMAND_REJ: + l2cap_command_rej(conn, &cmd, data); + break; + + case L2CAP_CONN_REQ: + err = l2cap_connect_req(conn, &cmd, data); + break; + + case L2CAP_CONN_RSP: + err = l2cap_connect_rsp(conn, &cmd, data); + break; + + case L2CAP_CONF_REQ: + err = l2cap_config_req(conn, &cmd, cmd_len, data); + break; + + case L2CAP_CONF_RSP: + err = l2cap_config_rsp(conn, &cmd, data); + break; + + case L2CAP_DISCONN_REQ: + err = l2cap_disconnect_req(conn, &cmd, data); + break; + + case L2CAP_DISCONN_RSP: + err = l2cap_disconnect_rsp(conn, &cmd, data); + break; + + case L2CAP_ECHO_REQ: + l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data); + break; + + case L2CAP_ECHO_RSP: + break; + + case L2CAP_INFO_REQ: + err = l2cap_information_req(conn, &cmd, data); + break; + + case L2CAP_INFO_RSP: + err = l2cap_information_rsp(conn, &cmd, data); + break; + + default: + BT_ERR("Unknown signaling command 0x%2.2x", cmd.code); + err = -EINVAL; + break; + } + + if (err) { + struct l2cap_cmd_rej rej; + BT_DBG("error %d", err); + + /* FIXME: Map err to a valid reason */ + rej.reason = cpu_to_le16(0); + l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); + } + + data += cmd_len; + len -= cmd_len; + } + + kfree_skb(skb); +} + +static int l2cap_check_fcs(struct l2cap_pinfo *pi, struct sk_buff *skb) +{ + u16 our_fcs, rcv_fcs; + int hdr_size = L2CAP_HDR_SIZE + 2; + + if (pi->fcs == L2CAP_FCS_CRC16) { + skb_trim(skb, skb->len - 2); + rcv_fcs = get_unaligned_le16(skb->data + skb->len); + our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size); + + if (our_fcs != rcv_fcs) + return -EBADMSG; + } + return 0; +} + +static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + u16 control = 0; + + pi->frames_sent = 0; + + control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + + if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { + control |= L2CAP_SUPER_RCV_NOT_READY; + l2cap_send_sframe(pi, control); + pi->conn_state |= L2CAP_CONN_RNR_SENT; + } + + if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY) + l2cap_retransmit_frames(sk); + + l2cap_ertm_send(sk); + + if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) && + pi->frames_sent == 0) { + control |= L2CAP_SUPER_RCV_READY; + l2cap_send_sframe(pi, control); + } +} + +static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar) +{ + struct sk_buff *next_skb; + struct l2cap_pinfo *pi = l2cap_pi(sk); + int tx_seq_offset, next_tx_seq_offset; + + bt_cb(skb)->tx_seq = tx_seq; + bt_cb(skb)->sar = sar; + + next_skb = skb_peek(SREJ_QUEUE(sk)); + if (!next_skb) { + __skb_queue_tail(SREJ_QUEUE(sk), skb); + return 0; + } + + tx_seq_offset = (tx_seq - pi->buffer_seq) % 64; + if (tx_seq_offset < 0) + tx_seq_offset += 64; + + do { + if (bt_cb(next_skb)->tx_seq == tx_seq) + return -EINVAL; + + next_tx_seq_offset = (bt_cb(next_skb)->tx_seq - + pi->buffer_seq) % 64; + if (next_tx_seq_offset < 0) + next_tx_seq_offset += 64; + + if (next_tx_seq_offset > tx_seq_offset) { + __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb); + return 0; + } + + if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb)) + break; + + } while ((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb))); + + __skb_queue_tail(SREJ_QUEUE(sk), skb); + + return 0; +} + +static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *_skb; + int err; + + switch (control & L2CAP_CTRL_SAR) { + case L2CAP_SDU_UNSEGMENTED: + if (pi->conn_state & L2CAP_CONN_SAR_SDU) + goto drop; + + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return err; + + break; + + case L2CAP_SDU_START: + if (pi->conn_state & L2CAP_CONN_SAR_SDU) + goto drop; + + pi->sdu_len = get_unaligned_le16(skb->data); + + if (pi->sdu_len > pi->imtu) + goto disconnect; + + pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC); + if (!pi->sdu) + return -ENOMEM; + + /* pull sdu_len bytes only after alloc, because of Local Busy + * condition we have to be sure that this will be executed + * only once, i.e., when alloc does not fail */ + skb_pull(skb, 2); + + memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); + + pi->conn_state |= L2CAP_CONN_SAR_SDU; + pi->partial_sdu_len = skb->len; + break; + + case L2CAP_SDU_CONTINUE: + if (!(pi->conn_state & L2CAP_CONN_SAR_SDU)) + goto disconnect; + + if (!pi->sdu) + goto disconnect; + + pi->partial_sdu_len += skb->len; + if (pi->partial_sdu_len > pi->sdu_len) + goto drop; + + memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); + + break; + + case L2CAP_SDU_END: + if (!(pi->conn_state & L2CAP_CONN_SAR_SDU)) + goto disconnect; + + if (!pi->sdu) + goto disconnect; + + if (!(pi->conn_state & L2CAP_CONN_SAR_RETRY)) { + pi->partial_sdu_len += skb->len; + + if (pi->partial_sdu_len > pi->imtu) + goto drop; + + if (pi->partial_sdu_len != pi->sdu_len) + goto drop; + + memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); + } + + _skb = skb_clone(pi->sdu, GFP_ATOMIC); + if (!_skb) { + pi->conn_state |= L2CAP_CONN_SAR_RETRY; + return -ENOMEM; + } + + err = sock_queue_rcv_skb(sk, _skb); + if (err < 0) { + kfree_skb(_skb); + pi->conn_state |= L2CAP_CONN_SAR_RETRY; + return err; + } + + pi->conn_state &= ~L2CAP_CONN_SAR_RETRY; + pi->conn_state &= ~L2CAP_CONN_SAR_SDU; + + kfree_skb(pi->sdu); + break; + } + + kfree_skb(skb); + return 0; + +drop: + kfree_skb(pi->sdu); + pi->sdu = NULL; + +disconnect: + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + kfree_skb(skb); + return 0; +} + +static int l2cap_try_push_rx_skb(struct sock *sk) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *skb; + u16 control; + int err; + + while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) { + control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; + err = l2cap_ertm_reassembly_sdu(sk, skb, control); + if (err < 0) { + skb_queue_head(BUSY_QUEUE(sk), skb); + return -EBUSY; + } + + pi->buffer_seq = (pi->buffer_seq + 1) % 64; + } + + if (!(pi->conn_state & L2CAP_CONN_RNR_SENT)) + goto done; + + control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL; + l2cap_send_sframe(pi, control); + l2cap_pi(sk)->retry_count = 1; + + del_timer(&pi->retrans_timer); + __mod_monitor_timer(); + + l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; + +done: + pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY; + pi->conn_state &= ~L2CAP_CONN_RNR_SENT; + + BT_DBG("sk %p, Exit local busy", sk); + + return 0; +} + +static void l2cap_busy_work(struct work_struct *work) +{ + DECLARE_WAITQUEUE(wait, current); + struct l2cap_pinfo *pi = + container_of(work, struct l2cap_pinfo, busy_work); + struct sock *sk = (struct sock *)pi; + int n_tries = 0, timeo = HZ/5, err; + struct sk_buff *skb; + + lock_sock(sk); + + add_wait_queue(sk_sleep(sk), &wait); + while ((skb = skb_peek(BUSY_QUEUE(sk)))) { + set_current_state(TASK_INTERRUPTIBLE); + + if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) { + err = -EBUSY; + l2cap_send_disconn_req(pi->conn, sk, EBUSY); + break; + } + + if (!timeo) + timeo = HZ/5; + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + + err = sock_error(sk); + if (err) + break; + + if (l2cap_try_push_rx_skb(sk) == 0) + break; + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + + release_sock(sk); +} + +static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + int sctrl, err; + + if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) { + bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; + __skb_queue_tail(BUSY_QUEUE(sk), skb); + return l2cap_try_push_rx_skb(sk); + + + } + + err = l2cap_ertm_reassembly_sdu(sk, skb, control); + if (err >= 0) { + pi->buffer_seq = (pi->buffer_seq + 1) % 64; + return err; + } + + /* Busy Condition */ + BT_DBG("sk %p, Enter local busy", sk); + + pi->conn_state |= L2CAP_CONN_LOCAL_BUSY; + bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT; + __skb_queue_tail(BUSY_QUEUE(sk), skb); + + sctrl = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; + sctrl |= L2CAP_SUPER_RCV_NOT_READY; + l2cap_send_sframe(pi, sctrl); + + pi->conn_state |= L2CAP_CONN_RNR_SENT; + + del_timer(&pi->ack_timer); + + queue_work(_busy_wq, &pi->busy_work); + + return err; +} + +static int l2cap_streaming_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *_skb; + int err = -EINVAL; + + /* + * TODO: We have to notify the userland if some data is lost with the + * Streaming Mode. + */ + + switch (control & L2CAP_CTRL_SAR) { + case L2CAP_SDU_UNSEGMENTED: + if (pi->conn_state & L2CAP_CONN_SAR_SDU) { + kfree_skb(pi->sdu); + break; + } + + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return 0; + + break; + + case L2CAP_SDU_START: + if (pi->conn_state & L2CAP_CONN_SAR_SDU) { + kfree_skb(pi->sdu); + break; + } + + pi->sdu_len = get_unaligned_le16(skb->data); + skb_pull(skb, 2); + + if (pi->sdu_len > pi->imtu) { + err = -EMSGSIZE; + break; + } + + pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC); + if (!pi->sdu) { + err = -ENOMEM; + break; + } + + memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); + + pi->conn_state |= L2CAP_CONN_SAR_SDU; + pi->partial_sdu_len = skb->len; + err = 0; + break; + + case L2CAP_SDU_CONTINUE: + if (!(pi->conn_state & L2CAP_CONN_SAR_SDU)) + break; + + memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); + + pi->partial_sdu_len += skb->len; + if (pi->partial_sdu_len > pi->sdu_len) + kfree_skb(pi->sdu); + else + err = 0; + + break; + + case L2CAP_SDU_END: + if (!(pi->conn_state & L2CAP_CONN_SAR_SDU)) + break; + + memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len); + + pi->conn_state &= ~L2CAP_CONN_SAR_SDU; + pi->partial_sdu_len += skb->len; + + if (pi->partial_sdu_len > pi->imtu) + goto drop; + + if (pi->partial_sdu_len == pi->sdu_len) { + _skb = skb_clone(pi->sdu, GFP_ATOMIC); + err = sock_queue_rcv_skb(sk, _skb); + if (err < 0) + kfree_skb(_skb); + } + err = 0; + +drop: + kfree_skb(pi->sdu); + break; + } + + kfree_skb(skb); + return err; +} + +static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq) +{ + struct sk_buff *skb; + u16 control; + + while ((skb = skb_peek(SREJ_QUEUE(sk)))) { + if (bt_cb(skb)->tx_seq != tx_seq) + break; + + skb = skb_dequeue(SREJ_QUEUE(sk)); + control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; + l2cap_ertm_reassembly_sdu(sk, skb, control); + l2cap_pi(sk)->buffer_seq_srej = + (l2cap_pi(sk)->buffer_seq_srej + 1) % 64; + tx_seq = (tx_seq + 1) % 64; + } +} + +static void l2cap_resend_srejframe(struct sock *sk, u8 tx_seq) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct srej_list *l, *tmp; + u16 control; + + list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) { + if (l->tx_seq == tx_seq) { + list_del(&l->list); + kfree(l); + return; + } + control = L2CAP_SUPER_SELECT_REJECT; + control |= l->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; + l2cap_send_sframe(pi, control); + list_del(&l->list); + list_add_tail(&l->list, SREJ_LIST(sk)); + } +} + +static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct srej_list *new; + u16 control; + + while (tx_seq != pi->expected_tx_seq) { + control = L2CAP_SUPER_SELECT_REJECT; + control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; + l2cap_send_sframe(pi, control); + + new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); + new->tx_seq = pi->expected_tx_seq; + pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; + list_add_tail(&new->list, SREJ_LIST(sk)); + } + pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; +} + +static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + u8 tx_seq = __get_txseq(rx_control); + u8 req_seq = __get_reqseq(rx_control); + u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; + int tx_seq_offset, expected_tx_seq_offset; + int num_to_ack = (pi->tx_win/6) + 1; + int err = 0; + + BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq, + rx_control); + + if (L2CAP_CTRL_FINAL & rx_control && + l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) { + del_timer(&pi->monitor_timer); + if (pi->unacked_frames > 0) + __mod_retrans_timer(); + pi->conn_state &= ~L2CAP_CONN_WAIT_F; + } + + pi->expected_ack_seq = req_seq; + l2cap_drop_acked_frames(sk); + + if (tx_seq == pi->expected_tx_seq) + goto expected; + + tx_seq_offset = (tx_seq - pi->buffer_seq) % 64; + if (tx_seq_offset < 0) + tx_seq_offset += 64; + + /* invalid tx_seq */ + if (tx_seq_offset >= pi->tx_win) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + if (pi->conn_state == L2CAP_CONN_LOCAL_BUSY) + goto drop; + + if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { + struct srej_list *first; + + first = list_first_entry(SREJ_LIST(sk), + struct srej_list, list); + if (tx_seq == first->tx_seq) { + l2cap_add_to_srej_queue(sk, skb, tx_seq, sar); + l2cap_check_srej_gap(sk, tx_seq); + + list_del(&first->list); + kfree(first); + + if (list_empty(SREJ_LIST(sk))) { + pi->buffer_seq = pi->buffer_seq_srej; + pi->conn_state &= ~L2CAP_CONN_SREJ_SENT; + l2cap_send_ack(pi); + BT_DBG("sk %p, Exit SREJ_SENT", sk); + } + } else { + struct srej_list *l; + + /* duplicated tx_seq */ + if (l2cap_add_to_srej_queue(sk, skb, tx_seq, sar) < 0) + goto drop; + + list_for_each_entry(l, SREJ_LIST(sk), list) { + if (l->tx_seq == tx_seq) { + l2cap_resend_srejframe(sk, tx_seq); + return 0; + } + } + l2cap_send_srejframe(sk, tx_seq); + } + } else { + expected_tx_seq_offset = + (pi->expected_tx_seq - pi->buffer_seq) % 64; + if (expected_tx_seq_offset < 0) + expected_tx_seq_offset += 64; + + /* duplicated tx_seq */ + if (tx_seq_offset < expected_tx_seq_offset) + goto drop; + + pi->conn_state |= L2CAP_CONN_SREJ_SENT; + + BT_DBG("sk %p, Enter SREJ", sk); + + INIT_LIST_HEAD(SREJ_LIST(sk)); + pi->buffer_seq_srej = pi->buffer_seq; + + __skb_queue_head_init(SREJ_QUEUE(sk)); + __skb_queue_head_init(BUSY_QUEUE(sk)); + l2cap_add_to_srej_queue(sk, skb, tx_seq, sar); + + pi->conn_state |= L2CAP_CONN_SEND_PBIT; + + l2cap_send_srejframe(sk, tx_seq); + + del_timer(&pi->ack_timer); + } + return 0; + +expected: + pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; + + if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { + bt_cb(skb)->tx_seq = tx_seq; + bt_cb(skb)->sar = sar; + __skb_queue_tail(SREJ_QUEUE(sk), skb); + return 0; + } + + err = l2cap_push_rx_skb(sk, skb, rx_control); + if (err < 0) + return 0; + + if (rx_control & L2CAP_CTRL_FINAL) { + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else + l2cap_retransmit_frames(sk); + } + + __mod_ack_timer(); + + pi->num_acked = (pi->num_acked + 1) % num_to_ack; + if (pi->num_acked == num_to_ack - 1) + l2cap_send_ack(pi); + + return 0; + +drop: + kfree_skb(skb); + return 0; +} + +static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control), + rx_control); + + pi->expected_ack_seq = __get_reqseq(rx_control); + l2cap_drop_acked_frames(sk); + + if (rx_control & L2CAP_CTRL_POLL) { + pi->conn_state |= L2CAP_CONN_SEND_FBIT; + if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { + if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && + (pi->unacked_frames > 0)) + __mod_retrans_timer(); + + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; + l2cap_send_srejtail(sk); + } else { + l2cap_send_i_or_rr_or_rnr(sk); + } + + } else if (rx_control & L2CAP_CTRL_FINAL) { + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; + + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else + l2cap_retransmit_frames(sk); + + } else { + if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && + (pi->unacked_frames > 0)) + __mod_retrans_timer(); + + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; + if (pi->conn_state & L2CAP_CONN_SREJ_SENT) + l2cap_send_ack(pi); + else + l2cap_ertm_send(sk); + } +} + +static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + u8 tx_seq = __get_reqseq(rx_control); + + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; + + pi->expected_ack_seq = tx_seq; + l2cap_drop_acked_frames(sk); + + if (rx_control & L2CAP_CTRL_FINAL) { + if (pi->conn_state & L2CAP_CONN_REJ_ACT) + pi->conn_state &= ~L2CAP_CONN_REJ_ACT; + else + l2cap_retransmit_frames(sk); + } else { + l2cap_retransmit_frames(sk); + + if (pi->conn_state & L2CAP_CONN_WAIT_F) + pi->conn_state |= L2CAP_CONN_REJ_ACT; + } +} +static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + u8 tx_seq = __get_reqseq(rx_control); + + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + + pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; + + if (rx_control & L2CAP_CTRL_POLL) { + pi->expected_ack_seq = tx_seq; + l2cap_drop_acked_frames(sk); + + pi->conn_state |= L2CAP_CONN_SEND_FBIT; + l2cap_retransmit_one_frame(sk, tx_seq); + + l2cap_ertm_send(sk); + + if (pi->conn_state & L2CAP_CONN_WAIT_F) { + pi->srej_save_reqseq = tx_seq; + pi->conn_state |= L2CAP_CONN_SREJ_ACT; + } + } else if (rx_control & L2CAP_CTRL_FINAL) { + if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) && + pi->srej_save_reqseq == tx_seq) + pi->conn_state &= ~L2CAP_CONN_SREJ_ACT; + else + l2cap_retransmit_one_frame(sk, tx_seq); + } else { + l2cap_retransmit_one_frame(sk, tx_seq); + if (pi->conn_state & L2CAP_CONN_WAIT_F) { + pi->srej_save_reqseq = tx_seq; + pi->conn_state |= L2CAP_CONN_SREJ_ACT; + } + } +} + +static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + u8 tx_seq = __get_reqseq(rx_control); + + BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control); + + pi->conn_state |= L2CAP_CONN_REMOTE_BUSY; + pi->expected_ack_seq = tx_seq; + l2cap_drop_acked_frames(sk); + + if (rx_control & L2CAP_CTRL_POLL) + pi->conn_state |= L2CAP_CONN_SEND_FBIT; + + if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) { + del_timer(&pi->retrans_timer); + if (rx_control & L2CAP_CTRL_POLL) + l2cap_send_rr_or_rnr(pi, L2CAP_CTRL_FINAL); + return; + } + + if (rx_control & L2CAP_CTRL_POLL) + l2cap_send_srejtail(sk); + else + l2cap_send_sframe(pi, L2CAP_SUPER_RCV_READY); +} + +static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) +{ + BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len); + + if (L2CAP_CTRL_FINAL & rx_control && + l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) { + del_timer(&l2cap_pi(sk)->monitor_timer); + if (l2cap_pi(sk)->unacked_frames > 0) + __mod_retrans_timer(); + l2cap_pi(sk)->conn_state &= ~L2CAP_CONN_WAIT_F; + } + + switch (rx_control & L2CAP_CTRL_SUPERVISE) { + case L2CAP_SUPER_RCV_READY: + l2cap_data_channel_rrframe(sk, rx_control); + break; + + case L2CAP_SUPER_REJECT: + l2cap_data_channel_rejframe(sk, rx_control); + break; + + case L2CAP_SUPER_SELECT_REJECT: + l2cap_data_channel_srejframe(sk, rx_control); + break; + + case L2CAP_SUPER_RCV_NOT_READY: + l2cap_data_channel_rnrframe(sk, rx_control); + break; + } + + kfree_skb(skb); + return 0; +} + +static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + u16 control; + u8 req_seq; + int len, next_tx_seq_offset, req_seq_offset; + + control = get_unaligned_le16(skb->data); + skb_pull(skb, 2); + len = skb->len; + + /* + * We can just drop the corrupted I-frame here. + * Receiver will miss it and start proper recovery + * procedures and ask retransmission. + */ + if (l2cap_check_fcs(pi, skb)) + goto drop; + + if (__is_sar_start(control) && __is_iframe(control)) + len -= 2; + + if (pi->fcs == L2CAP_FCS_CRC16) + len -= 2; + + if (len > pi->mps) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + req_seq = __get_reqseq(control); + req_seq_offset = (req_seq - pi->expected_ack_seq) % 64; + if (req_seq_offset < 0) + req_seq_offset += 64; + + next_tx_seq_offset = + (pi->next_tx_seq - pi->expected_ack_seq) % 64; + if (next_tx_seq_offset < 0) + next_tx_seq_offset += 64; + + /* check for invalid req-seq */ + if (req_seq_offset > next_tx_seq_offset) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + if (__is_iframe(control)) { + if (len < 0) { + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + l2cap_data_channel_iframe(sk, control, skb); + } else { + if (len != 0) { + BT_ERR("%d", len); + l2cap_send_disconn_req(pi->conn, sk, ECONNRESET); + goto drop; + } + + l2cap_data_channel_sframe(sk, control, skb); + } + + return 0; + +drop: + kfree_skb(skb); + return 0; +} + +static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) +{ + struct sock *sk; + struct l2cap_pinfo *pi; + u16 control; + u8 tx_seq; + int len; + + sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); + if (!sk) { + BT_DBG("unknown cid 0x%4.4x", cid); + goto drop; + } + + pi = l2cap_pi(sk); + + BT_DBG("sk %p, len %d", sk, skb->len); + + if (sk->sk_state != BT_CONNECTED) + goto drop; + + switch (pi->mode) { + case L2CAP_MODE_BASIC: + /* If socket recv buffers overflows we drop data here + * which is *bad* because L2CAP has to be reliable. + * But we don't have any other choice. L2CAP doesn't + * provide flow control mechanism. */ + + if (pi->imtu < skb->len) + goto drop; + + if (!sock_queue_rcv_skb(sk, skb)) + goto done; + break; + + case L2CAP_MODE_ERTM: + if (!sock_owned_by_user(sk)) { + l2cap_ertm_data_rcv(sk, skb); + } else { + if (sk_add_backlog(sk, skb)) + goto drop; + } + + goto done; + + case L2CAP_MODE_STREAMING: + control = get_unaligned_le16(skb->data); + skb_pull(skb, 2); + len = skb->len; + + if (l2cap_check_fcs(pi, skb)) + goto drop; + + if (__is_sar_start(control)) + len -= 2; + + if (pi->fcs == L2CAP_FCS_CRC16) + len -= 2; + + if (len > pi->mps || len < 0 || __is_sframe(control)) + goto drop; + + tx_seq = __get_txseq(control); + + if (pi->expected_tx_seq == tx_seq) + pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; + else + pi->expected_tx_seq = (tx_seq + 1) % 64; + + l2cap_streaming_reassembly_sdu(sk, skb, control); + + goto done; + + default: + BT_DBG("sk %p: bad mode 0x%2.2x", sk, pi->mode); + break; + } + +drop: + kfree_skb(skb); + +done: + if (sk) + bh_unlock_sock(sk); + + return 0; +} + +static inline int l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, struct sk_buff *skb) +{ + struct sock *sk; + + sk = l2cap_get_sock_by_psm(0, psm, conn->src); + if (!sk) + goto drop; + + bh_lock_sock(sk); + + BT_DBG("sk %p, len %d", sk, skb->len); + + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_CONNECTED) + goto drop; + + if (l2cap_pi(sk)->imtu < skb->len) + goto drop; + + if (!sock_queue_rcv_skb(sk, skb)) + goto done; + +drop: + kfree_skb(skb); + +done: + if (sk) + bh_unlock_sock(sk); + return 0; +} + +static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) +{ + struct l2cap_hdr *lh = (void *) skb->data; + u16 cid, len; + __le16 psm; + + skb_pull(skb, L2CAP_HDR_SIZE); + cid = __le16_to_cpu(lh->cid); + len = __le16_to_cpu(lh->len); + + if (len != skb->len) { + kfree_skb(skb); + return; + } + + BT_DBG("len %d, cid 0x%4.4x", len, cid); + + switch (cid) { + case L2CAP_CID_SIGNALING: + l2cap_sig_channel(conn, skb); + break; + + case L2CAP_CID_CONN_LESS: + psm = get_unaligned_le16(skb->data); + skb_pull(skb, 2); + l2cap_conless_channel(conn, psm, skb); + break; + + default: + l2cap_data_channel(conn, cid, skb); + break; + } +} + +/* ---- L2CAP interface with lower layer (HCI) ---- */ + +static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) +{ + int exact = 0, lm1 = 0, lm2 = 0; + register struct sock *sk; + struct hlist_node *node; + + if (type != ACL_LINK) + return -EINVAL; + + BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); + + /* Find listening sockets and check their link_mode */ + read_lock(&l2cap_sk_list.lock); + sk_for_each(sk, node, &l2cap_sk_list.head) { + if (sk->sk_state != BT_LISTEN) + continue; + + if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) { + lm1 |= HCI_LM_ACCEPT; + if (l2cap_pi(sk)->role_switch) + lm1 |= HCI_LM_MASTER; + exact++; + } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) { + lm2 |= HCI_LM_ACCEPT; + if (l2cap_pi(sk)->role_switch) + lm2 |= HCI_LM_MASTER; + } + } + read_unlock(&l2cap_sk_list.lock); + + return exact ? lm1 : lm2; +} + +static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) +{ + struct l2cap_conn *conn; + + BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); + + if (hcon->type != ACL_LINK) + return -EINVAL; + + if (!status) { + conn = l2cap_conn_add(hcon, status); + if (conn) + l2cap_conn_ready(conn); + } else + l2cap_conn_del(hcon, bt_err(status)); + + return 0; +} + +static int l2cap_disconn_ind(struct hci_conn *hcon) +{ + struct l2cap_conn *conn = hcon->l2cap_data; + + BT_DBG("hcon %p", hcon); + + if (hcon->type != ACL_LINK || !conn) + return 0x13; + + return conn->disc_reason; +} + +static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) +{ + BT_DBG("hcon %p reason %d", hcon, reason); + + if (hcon->type != ACL_LINK) + return -EINVAL; + + l2cap_conn_del(hcon, bt_err(reason)); + + return 0; +} + +static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt) +{ + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM) + return; + + if (encrypt == 0x00) { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) { + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ * 5); + } else if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) + __l2cap_sock_close(sk, ECONNREFUSED); + } else { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) + l2cap_sock_clear_timer(sk); + } +} + +static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) +{ + struct l2cap_chan_list *l; + struct l2cap_conn *conn = hcon->l2cap_data; + struct sock *sk; + + if (!conn) + return 0; + + l = &conn->chan_list; + + BT_DBG("conn %p", conn); + + read_lock(&l->lock); + + for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { + bh_lock_sock(sk); + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND) { + bh_unlock_sock(sk); + continue; + } + + if (!status && (sk->sk_state == BT_CONNECTED || + sk->sk_state == BT_CONFIG)) { + l2cap_check_encryption(sk, encrypt); + bh_unlock_sock(sk); + continue; + } + + if (sk->sk_state == BT_CONNECT) { + if (!status) { + struct l2cap_conn_req req; + req.scid = cpu_to_le16(l2cap_pi(sk)->scid); + req.psm = l2cap_pi(sk)->psm; + + l2cap_pi(sk)->ident = l2cap_get_ident(conn); + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; + + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_REQ, sizeof(req), &req); + } else { + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ / 10); + } + } else if (sk->sk_state == BT_CONNECT2) { + struct l2cap_conn_rsp rsp; + __u16 result; + + if (!status) { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + } else { + sk->sk_state = BT_DISCONN; + l2cap_sock_set_timer(sk, HZ / 10); + result = L2CAP_CR_SEC_BLOCK; + } + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } + + bh_unlock_sock(sk); + } + + read_unlock(&l->lock); + + return 0; +} + +static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) +{ + struct l2cap_conn *conn = hcon->l2cap_data; + + if (!conn) + conn = l2cap_conn_add(hcon, 0); + + if (!conn) + goto drop; + + BT_DBG("conn %p len %d flags 0x%x", conn, skb->len, flags); + + if (!(flags & ACL_CONT)) { + struct l2cap_hdr *hdr; + struct sock *sk; + u16 cid; + int len; + + if (conn->rx_len) { + BT_ERR("Unexpected start frame (len %d)", skb->len); + kfree_skb(conn->rx_skb); + conn->rx_skb = NULL; + conn->rx_len = 0; + l2cap_conn_unreliable(conn, ECOMM); + } + + /* Start fragment always begin with Basic L2CAP header */ + if (skb->len < L2CAP_HDR_SIZE) { + BT_ERR("Frame is too short (len %d)", skb->len); + l2cap_conn_unreliable(conn, ECOMM); + goto drop; + } + + hdr = (struct l2cap_hdr *) skb->data; + len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE; + cid = __le16_to_cpu(hdr->cid); + + if (len == skb->len) { + /* Complete frame received */ + l2cap_recv_frame(conn, skb); + return 0; + } + + BT_DBG("Start: total len %d, frag len %d", len, skb->len); + + if (skb->len > len) { + BT_ERR("Frame is too long (len %d, expected len %d)", + skb->len, len); + l2cap_conn_unreliable(conn, ECOMM); + goto drop; + } + + sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); + + if (sk && l2cap_pi(sk)->imtu < len - L2CAP_HDR_SIZE) { + BT_ERR("Frame exceeding recv MTU (len %d, MTU %d)", + len, l2cap_pi(sk)->imtu); + bh_unlock_sock(sk); + l2cap_conn_unreliable(conn, ECOMM); + goto drop; + } + + if (sk) + bh_unlock_sock(sk); + + /* Allocate skb for the complete frame (with header) */ + conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC); + if (!conn->rx_skb) + goto drop; + + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), + skb->len); + conn->rx_len = len - skb->len; + } else { + BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); + + if (!conn->rx_len) { + BT_ERR("Unexpected continuation frame (len %d)", skb->len); + l2cap_conn_unreliable(conn, ECOMM); + goto drop; + } + + if (skb->len > conn->rx_len) { + BT_ERR("Fragment is too long (len %d, expected %d)", + skb->len, conn->rx_len); + kfree_skb(conn->rx_skb); + conn->rx_skb = NULL; + conn->rx_len = 0; + l2cap_conn_unreliable(conn, ECOMM); + goto drop; + } + + skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), + skb->len); + conn->rx_len -= skb->len; + + if (!conn->rx_len) { + /* Complete frame received */ + l2cap_recv_frame(conn, conn->rx_skb); + conn->rx_skb = NULL; + } + } + +drop: + kfree_skb(skb); + return 0; +} + +static int l2cap_debugfs_show(struct seq_file *f, void *p) +{ + struct sock *sk; + struct hlist_node *node; + + read_lock_bh(&l2cap_sk_list.lock); + + sk_for_each(sk, node, &l2cap_sk_list.head) { + struct l2cap_pinfo *pi = l2cap_pi(sk); + + seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", + batostr(&bt_sk(sk)->src), + batostr(&bt_sk(sk)->dst), + sk->sk_state, __le16_to_cpu(pi->psm), + pi->scid, pi->dcid, + pi->imtu, pi->omtu, pi->sec_level); + } + + read_unlock_bh(&l2cap_sk_list.lock); + + return 0; +} + +static int l2cap_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, l2cap_debugfs_show, inode->i_private); +} + +static const struct file_operations l2cap_debugfs_fops = { + .open = l2cap_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *l2cap_debugfs; + +static const struct proto_ops l2cap_sock_ops = { + .family = PF_BLUETOOTH, + .owner = THIS_MODULE, + .release = l2cap_sock_release, + .bind = l2cap_sock_bind, + .connect = l2cap_sock_connect, + .listen = l2cap_sock_listen, + .accept = l2cap_sock_accept, + .getname = l2cap_sock_getname, + .sendmsg = l2cap_sock_sendmsg, + .recvmsg = l2cap_sock_recvmsg, + .poll = bt_sock_poll, + .ioctl = bt_sock_ioctl, + .mmap = sock_no_mmap, + .socketpair = sock_no_socketpair, + .shutdown = l2cap_sock_shutdown, + .setsockopt = l2cap_sock_setsockopt, + .getsockopt = l2cap_sock_getsockopt +}; + +static const struct net_proto_family l2cap_sock_family_ops = { + .family = PF_BLUETOOTH, + .owner = THIS_MODULE, + .create = l2cap_sock_create, +}; + +static struct hci_proto l2cap_hci_proto = { + .name = "L2CAP", + .id = HCI_PROTO_L2CAP, + .connect_ind = l2cap_connect_ind, + .connect_cfm = l2cap_connect_cfm, + .disconn_ind = l2cap_disconn_ind, + .disconn_cfm = l2cap_disconn_cfm, + .security_cfm = l2cap_security_cfm, + .recv_acldata = l2cap_recv_acldata +}; + +static int __init l2cap_init(void) +{ + int err; + + err = proto_register(&l2cap_proto, 0); + if (err < 0) + return err; + + _busy_wq = create_singlethread_workqueue("l2cap"); + if (!_busy_wq) { + proto_unregister(&l2cap_proto); + return -ENOMEM; + } + + err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); + if (err < 0) { + BT_ERR("L2CAP socket registration failed"); + goto error; + } + + err = hci_register_proto(&l2cap_hci_proto); + if (err < 0) { + BT_ERR("L2CAP protocol registration failed"); + bt_sock_unregister(BTPROTO_L2CAP); + goto error; + } + + if (bt_debugfs) { + l2cap_debugfs = debugfs_create_file("l2cap", 0444, + bt_debugfs, NULL, &l2cap_debugfs_fops); + if (!l2cap_debugfs) + BT_ERR("Failed to create L2CAP debug file"); + } + + BT_INFO("L2CAP ver %s", VERSION); + BT_INFO("L2CAP socket layer initialized"); + + return 0; + +error: + destroy_workqueue(_busy_wq); + proto_unregister(&l2cap_proto); + return err; +} + +static void __exit l2cap_exit(void) +{ + debugfs_remove(l2cap_debugfs); + + flush_workqueue(_busy_wq); + destroy_workqueue(_busy_wq); + + if (bt_sock_unregister(BTPROTO_L2CAP) < 0) + BT_ERR("L2CAP socket unregistration failed"); + + if (hci_unregister_proto(&l2cap_hci_proto) < 0) + BT_ERR("L2CAP protocol unregistration failed"); + + proto_unregister(&l2cap_proto); +} + +void l2cap_load(void) +{ + /* Dummy function to trigger automatic L2CAP module loading by + * other modules that use L2CAP sockets but don't use any other + * symbols from it. */ +} +EXPORT_SYMBOL(l2cap_load); + +module_init(l2cap_init); +module_exit(l2cap_exit); + +module_param(disable_ertm, bool, 0644); +MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); + +MODULE_AUTHOR("Marcel Holtmann "); +MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); +MODULE_VERSION(VERSION); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("bt-proto-0"); -- cgit v1.1 From bb58f747e519aba07a6f05a78d58cf8a0788e2d5 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Thu, 3 Feb 2011 20:50:35 -0200 Subject: Bluetooth: Initial work for L2CAP split. This patch tries to do the minimal to move l2cap_sock_create() and its dependencies to l2cap_sock.c. It create a API to initialize and cleanup the L2CAP sockets from l2cap_core.c through l2cap_init_sockets() and l2cap_cleanup_sockets(). Signed-off-by: Gustavo F. Padovan --- net/bluetooth/Makefile | 2 +- net/bluetooth/l2cap_core.c | 187 +++------------------------------------ net/bluetooth/l2cap_sock.c | 213 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+), 177 deletions(-) create mode 100644 net/bluetooth/l2cap_sock.c (limited to 'net') diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index bf2945e..339b429 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -11,4 +11,4 @@ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o -l2cap-y := l2cap_core.o +l2cap-y := l2cap_core.o l2cap_sock.o diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 28d2954..af678ef 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -24,7 +24,7 @@ SOFTWARE IS DISCLAIMED. */ -/* Bluetooth L2CAP core and sockets. */ +/* Bluetooth L2CAP core. */ #include @@ -57,24 +57,20 @@ #define VERSION "2.15" -static int disable_ertm; +int disable_ertm; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; static u8 l2cap_fixed_chan[8] = { 0x02, }; -static const struct proto_ops l2cap_sock_ops; - static struct workqueue_struct *_busy_wq; -static struct bt_sock_list l2cap_sk_list = { +struct bt_sock_list l2cap_sk_list = { .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) }; static void l2cap_busy_work(struct work_struct *work); -static void __l2cap_sock_close(struct sock *sk, int reason); static void l2cap_sock_close(struct sock *sk); -static void l2cap_sock_kill(struct sock *sk); static int l2cap_build_conf_req(struct sock *sk, void *data); static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, @@ -83,7 +79,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); /* ---- L2CAP timers ---- */ -static void l2cap_sock_set_timer(struct sock *sk, long timeout) +void l2cap_sock_set_timer(struct sock *sk, long timeout) { BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); @@ -95,39 +91,6 @@ static void l2cap_sock_clear_timer(struct sock *sk) sk_stop_timer(sk, &sk->sk_timer); } -static void l2cap_sock_timeout(unsigned long arg) -{ - struct sock *sk = (struct sock *) arg; - int reason; - - BT_DBG("sock %p state %d", sk, sk->sk_state); - - bh_lock_sock(sk); - - if (sock_owned_by_user(sk)) { - /* sk is owned by user. Try again later */ - l2cap_sock_set_timer(sk, HZ / 5); - bh_unlock_sock(sk); - sock_put(sk); - return; - } - - if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) - reason = ECONNREFUSED; - else if (sk->sk_state == BT_CONNECT && - l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) - reason = ECONNREFUSED; - else - reason = ETIMEDOUT; - - __l2cap_sock_close(sk, reason); - - bh_unlock_sock(sk); - - l2cap_sock_kill(sk); - sock_put(sk); -} - /* ---- L2CAP channels ---- */ static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) { @@ -801,14 +764,6 @@ static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) return node ? sk : sk1; } -static void l2cap_sock_destruct(struct sock *sk) -{ - BT_DBG("sk %p", sk); - - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); -} - static void l2cap_sock_cleanup_listen(struct sock *parent) { struct sock *sk; @@ -826,7 +781,7 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) /* Kill socket (only if zapped and orphan) * Must be called on unlocked socket. */ -static void l2cap_sock_kill(struct sock *sk) +void l2cap_sock_kill(struct sock *sk) { if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; @@ -839,7 +794,7 @@ static void l2cap_sock_kill(struct sock *sk) sock_put(sk); } -static void __l2cap_sock_close(struct sock *sk, int reason) +void __l2cap_sock_close(struct sock *sk, int reason) { BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); @@ -904,111 +859,6 @@ static void l2cap_sock_close(struct sock *sk) l2cap_sock_kill(sk); } -static void l2cap_sock_init(struct sock *sk, struct sock *parent) -{ - struct l2cap_pinfo *pi = l2cap_pi(sk); - - BT_DBG("sk %p", sk); - - if (parent) { - sk->sk_type = parent->sk_type; - bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup; - - pi->imtu = l2cap_pi(parent)->imtu; - pi->omtu = l2cap_pi(parent)->omtu; - pi->conf_state = l2cap_pi(parent)->conf_state; - pi->mode = l2cap_pi(parent)->mode; - pi->fcs = l2cap_pi(parent)->fcs; - pi->max_tx = l2cap_pi(parent)->max_tx; - pi->tx_win = l2cap_pi(parent)->tx_win; - pi->sec_level = l2cap_pi(parent)->sec_level; - pi->role_switch = l2cap_pi(parent)->role_switch; - pi->force_reliable = l2cap_pi(parent)->force_reliable; - pi->flushable = l2cap_pi(parent)->flushable; - } else { - pi->imtu = L2CAP_DEFAULT_MTU; - pi->omtu = 0; - if (!disable_ertm && sk->sk_type == SOCK_STREAM) { - pi->mode = L2CAP_MODE_ERTM; - pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; - } else { - pi->mode = L2CAP_MODE_BASIC; - } - pi->max_tx = L2CAP_DEFAULT_MAX_TX; - pi->fcs = L2CAP_FCS_CRC16; - pi->tx_win = L2CAP_DEFAULT_TX_WINDOW; - pi->sec_level = BT_SECURITY_LOW; - pi->role_switch = 0; - pi->force_reliable = 0; - pi->flushable = BT_FLUSHABLE_OFF; - } - - /* Default config options */ - pi->conf_len = 0; - pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; - skb_queue_head_init(TX_QUEUE(sk)); - skb_queue_head_init(SREJ_QUEUE(sk)); - skb_queue_head_init(BUSY_QUEUE(sk)); - INIT_LIST_HEAD(SREJ_LIST(sk)); -} - -static struct proto l2cap_proto = { - .name = "L2CAP", - .owner = THIS_MODULE, - .obj_size = sizeof(struct l2cap_pinfo) -}; - -static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) -{ - struct sock *sk; - - sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); - if (!sk) - return NULL; - - sock_init_data(sock, sk); - INIT_LIST_HEAD(&bt_sk(sk)->accept_q); - - sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); - - sock_reset_flag(sk, SOCK_ZAPPED); - - sk->sk_protocol = proto; - sk->sk_state = BT_OPEN; - - setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk); - - bt_sock_link(&l2cap_sk_list, sk); - return sk; -} - -static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - BT_DBG("sock %p", sock); - - sock->state = SS_UNCONNECTED; - - if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM && - sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) - return -ESOCKTNOSUPPORT; - - if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) - return -EPERM; - - sock->ops = &l2cap_sock_ops; - - sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); - if (!sk) - return -ENOMEM; - - l2cap_sock_init(sk, NULL); - return 0; -} - static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { struct sock *sk = sock->sk; @@ -4865,7 +4715,7 @@ static const struct file_operations l2cap_debugfs_fops = { static struct dentry *l2cap_debugfs; -static const struct proto_ops l2cap_sock_ops = { +const struct proto_ops l2cap_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = l2cap_sock_release, @@ -4885,12 +4735,6 @@ static const struct proto_ops l2cap_sock_ops = { .getsockopt = l2cap_sock_getsockopt }; -static const struct net_proto_family l2cap_sock_family_ops = { - .family = PF_BLUETOOTH, - .owner = THIS_MODULE, - .create = l2cap_sock_create, -}; - static struct hci_proto l2cap_hci_proto = { .name = "L2CAP", .id = HCI_PROTO_L2CAP, @@ -4906,19 +4750,13 @@ static int __init l2cap_init(void) { int err; - err = proto_register(&l2cap_proto, 0); + err = l2cap_init_sockets(); if (err < 0) return err; _busy_wq = create_singlethread_workqueue("l2cap"); if (!_busy_wq) { - proto_unregister(&l2cap_proto); - return -ENOMEM; - } - - err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); - if (err < 0) { - BT_ERR("L2CAP socket registration failed"); + err = -ENOMEM; goto error; } @@ -4943,7 +4781,7 @@ static int __init l2cap_init(void) error: destroy_workqueue(_busy_wq); - proto_unregister(&l2cap_proto); + l2cap_cleanup_sockets(); return err; } @@ -4954,13 +4792,10 @@ static void __exit l2cap_exit(void) flush_workqueue(_busy_wq); destroy_workqueue(_busy_wq); - if (bt_sock_unregister(BTPROTO_L2CAP) < 0) - BT_ERR("L2CAP socket unregistration failed"); - if (hci_unregister_proto(&l2cap_hci_proto) < 0) BT_ERR("L2CAP protocol unregistration failed"); - proto_unregister(&l2cap_proto); + l2cap_cleanup_sockets(); } void l2cap_load(void) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c new file mode 100644 index 0000000..6ea1894 --- /dev/null +++ b/net/bluetooth/l2cap_sock.c @@ -0,0 +1,213 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2000-2001 Qualcomm Incorporated + Copyright (C) 2009-2010 Gustavo F. Padovan + Copyright (C) 2010 Google Inc. + + Written 2000,2001 by Maxim Krasnyansky + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +/* Bluetooth L2CAP sockets. */ + +#include +#include + +static void l2cap_sock_timeout(unsigned long arg) +{ + struct sock *sk = (struct sock *) arg; + int reason; + + BT_DBG("sock %p state %d", sk, sk->sk_state); + + bh_lock_sock(sk); + + if (sock_owned_by_user(sk)) { + /* sk is owned by user. Try again later */ + l2cap_sock_set_timer(sk, HZ / 5); + bh_unlock_sock(sk); + sock_put(sk); + return; + } + + if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) + reason = ECONNREFUSED; + else if (sk->sk_state == BT_CONNECT && + l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) + reason = ECONNREFUSED; + else + reason = ETIMEDOUT; + + __l2cap_sock_close(sk, reason); + + bh_unlock_sock(sk); + + l2cap_sock_kill(sk); + sock_put(sk); +} + + +static void l2cap_sock_destruct(struct sock *sk) +{ + BT_DBG("sk %p", sk); + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); +} + +void l2cap_sock_init(struct sock *sk, struct sock *parent) +{ + struct l2cap_pinfo *pi = l2cap_pi(sk); + + BT_DBG("sk %p", sk); + + if (parent) { + sk->sk_type = parent->sk_type; + bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup; + + pi->imtu = l2cap_pi(parent)->imtu; + pi->omtu = l2cap_pi(parent)->omtu; + pi->conf_state = l2cap_pi(parent)->conf_state; + pi->mode = l2cap_pi(parent)->mode; + pi->fcs = l2cap_pi(parent)->fcs; + pi->max_tx = l2cap_pi(parent)->max_tx; + pi->tx_win = l2cap_pi(parent)->tx_win; + pi->sec_level = l2cap_pi(parent)->sec_level; + pi->role_switch = l2cap_pi(parent)->role_switch; + pi->force_reliable = l2cap_pi(parent)->force_reliable; + pi->flushable = l2cap_pi(parent)->flushable; + } else { + pi->imtu = L2CAP_DEFAULT_MTU; + pi->omtu = 0; + if (!disable_ertm && sk->sk_type == SOCK_STREAM) { + pi->mode = L2CAP_MODE_ERTM; + pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; + } else { + pi->mode = L2CAP_MODE_BASIC; + } + pi->max_tx = L2CAP_DEFAULT_MAX_TX; + pi->fcs = L2CAP_FCS_CRC16; + pi->tx_win = L2CAP_DEFAULT_TX_WINDOW; + pi->sec_level = BT_SECURITY_LOW; + pi->role_switch = 0; + pi->force_reliable = 0; + pi->flushable = BT_FLUSHABLE_OFF; + } + + /* Default config options */ + pi->conf_len = 0; + pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; + skb_queue_head_init(TX_QUEUE(sk)); + skb_queue_head_init(SREJ_QUEUE(sk)); + skb_queue_head_init(BUSY_QUEUE(sk)); + INIT_LIST_HEAD(SREJ_LIST(sk)); +} + +static struct proto l2cap_proto = { + .name = "L2CAP", + .owner = THIS_MODULE, + .obj_size = sizeof(struct l2cap_pinfo) +}; + +struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) +{ + struct sock *sk; + + sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto); + if (!sk) + return NULL; + + sock_init_data(sock, sk); + INIT_LIST_HEAD(&bt_sk(sk)->accept_q); + + sk->sk_destruct = l2cap_sock_destruct; + sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); + + sock_reset_flag(sk, SOCK_ZAPPED); + + sk->sk_protocol = proto; + sk->sk_state = BT_OPEN; + + setup_timer(&sk->sk_timer, l2cap_sock_timeout, (unsigned long) sk); + + bt_sock_link(&l2cap_sk_list, sk); + return sk; +} + +static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + + BT_DBG("sock %p", sock); + + sock->state = SS_UNCONNECTED; + + if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM && + sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) + return -ESOCKTNOSUPPORT; + + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) + return -EPERM; + + sock->ops = &l2cap_sock_ops; + + sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); + if (!sk) + return -ENOMEM; + + l2cap_sock_init(sk, NULL); + return 0; +} + +static const struct net_proto_family l2cap_sock_family_ops = { + .family = PF_BLUETOOTH, + .owner = THIS_MODULE, + .create = l2cap_sock_create, +}; + +int __init l2cap_init_sockets(void) +{ + int err; + + err = proto_register(&l2cap_proto, 0); + if (err < 0) + return err; + + err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); + if (err < 0) + goto error; + + BT_INFO("L2CAP socket layer initialized"); + + return 0; + +error: + BT_ERR("L2CAP socket registration failed"); + proto_unregister(&l2cap_proto); + return err; +} + +void l2cap_cleanup_sockets(void) +{ + if (bt_sock_unregister(BTPROTO_L2CAP) < 0) + BT_ERR("L2CAP socket unregistration failed"); + + proto_unregister(&l2cap_proto); +} -- cgit v1.1 From 65390587c7bcf8bb60b48387db766d8d7dfea982 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 02:33:56 -0200 Subject: Bluetooth: move l2cap_sock_ops to l2cap_sock.c First step to move all l2cap_sock_ops function to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 42 +++++++++++------------------------------- net/bluetooth/l2cap_sock.c | 21 ++++++++++++++++++++- 2 files changed, 31 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index af678ef..74a3ea3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -859,7 +859,7 @@ static void l2cap_sock_close(struct sock *sk) l2cap_sock_kill(sk); } -static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) +int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { struct sock *sk = sock->sk; struct sockaddr_l2 la; @@ -983,7 +983,7 @@ done: return err; } -static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) +int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { struct sock *sk = sock->sk; struct sockaddr_l2 la; @@ -1068,7 +1068,7 @@ done: return err; } -static int l2cap_sock_listen(struct socket *sock, int backlog) +int l2cap_sock_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int err = 0; @@ -1127,7 +1127,7 @@ done: return err; } -static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) +int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *nsk; @@ -1183,7 +1183,7 @@ done: return err; } -static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) +int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) { struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; struct sock *sk = sock->sk; @@ -1665,7 +1665,7 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz return size; } -static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) +int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1767,7 +1767,7 @@ done: return err; } -static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) +int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; @@ -1894,7 +1894,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us return err; } -static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) +int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; struct bt_security sec; @@ -2067,7 +2067,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us return err; } -static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct bt_security sec; @@ -2128,7 +2128,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch return err; } -static int l2cap_sock_shutdown(struct socket *sock, int how) +int l2cap_sock_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int err = 0; @@ -2159,7 +2159,7 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) return err; } -static int l2cap_sock_release(struct socket *sock) +int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; int err; @@ -4715,26 +4715,6 @@ static const struct file_operations l2cap_debugfs_fops = { static struct dentry *l2cap_debugfs; -const struct proto_ops l2cap_sock_ops = { - .family = PF_BLUETOOTH, - .owner = THIS_MODULE, - .release = l2cap_sock_release, - .bind = l2cap_sock_bind, - .connect = l2cap_sock_connect, - .listen = l2cap_sock_listen, - .accept = l2cap_sock_accept, - .getname = l2cap_sock_getname, - .sendmsg = l2cap_sock_sendmsg, - .recvmsg = l2cap_sock_recvmsg, - .poll = bt_sock_poll, - .ioctl = bt_sock_ioctl, - .mmap = sock_no_mmap, - .socketpair = sock_no_socketpair, - .shutdown = l2cap_sock_shutdown, - .setsockopt = l2cap_sock_setsockopt, - .getsockopt = l2cap_sock_getsockopt -}; - static struct hci_proto l2cap_hci_proto = { .name = "L2CAP", .id = HCI_PROTO_L2CAP, diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 6ea1894..c1455f7 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -62,7 +62,6 @@ static void l2cap_sock_timeout(unsigned long arg) sock_put(sk); } - static void l2cap_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); @@ -176,6 +175,26 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol, return 0; } +const struct proto_ops l2cap_sock_ops = { + .family = PF_BLUETOOTH, + .owner = THIS_MODULE, + .release = l2cap_sock_release, + .bind = l2cap_sock_bind, + .connect = l2cap_sock_connect, + .listen = l2cap_sock_listen, + .accept = l2cap_sock_accept, + .getname = l2cap_sock_getname, + .sendmsg = l2cap_sock_sendmsg, + .recvmsg = l2cap_sock_recvmsg, + .poll = bt_sock_poll, + .ioctl = bt_sock_ioctl, + .mmap = sock_no_mmap, + .socketpair = sock_no_socketpair, + .shutdown = l2cap_sock_shutdown, + .setsockopt = l2cap_sock_setsockopt, + .getsockopt = l2cap_sock_getsockopt +}; + static const struct net_proto_family l2cap_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, -- cgit v1.1 From 554f05bb8a0707dcc0ba4ea1dba1fb9970846ab5 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 02:36:42 -0200 Subject: Bluetooth: move l2cap_sock_release() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 17 ----------------- net/bluetooth/l2cap_sock.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 74a3ea3..5765a82 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2159,23 +2159,6 @@ int l2cap_sock_shutdown(struct socket *sock, int how) return err; } -int l2cap_sock_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - int err; - - BT_DBG("sock %p, sk %p", sock, sk); - - if (!sk) - return 0; - - err = l2cap_sock_shutdown(sock, 2); - - sock_orphan(sk); - l2cap_sock_kill(sk); - return err; -} - static void l2cap_chan_ready(struct sock *sk) { struct sock *parent = bt_sk(sk)->parent; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c1455f7..20efd24 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -62,6 +62,23 @@ static void l2cap_sock_timeout(unsigned long arg) sock_put(sk); } +static int l2cap_sock_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); + + if (!sk) + return 0; + + err = l2cap_sock_shutdown(sock, 2); + + sock_orphan(sk); + l2cap_sock_kill(sk); + return err; +} + static void l2cap_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); -- cgit v1.1 From af6bcd8205ac06fa1de98b2b28303157fb9c3dfc Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 02:40:28 -0200 Subject: Bluetooth: move l2cap_sock_bind()/listen() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 134 -------------------------------------------- net/bluetooth/l2cap_sock.c | 135 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 134 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5765a82..6af3872 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -722,17 +722,6 @@ static inline void l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, stru } /* ---- Socket interface ---- */ -static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) -{ - struct sock *sk; - struct hlist_node *node; - sk_for_each(sk, node, &l2cap_sk_list.head) - if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src)) - goto found; - sk = NULL; -found: - return sk; -} /* Find socket with psm and source bdaddr. * Returns closest match. @@ -859,70 +848,6 @@ static void l2cap_sock_close(struct sock *sk) l2cap_sock_kill(sk); } -int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) -{ - struct sock *sk = sock->sk; - struct sockaddr_l2 la; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (!addr || addr->sa_family != AF_BLUETOOTH) - return -EINVAL; - - memset(&la, 0, sizeof(la)); - len = min_t(unsigned int, sizeof(la), alen); - memcpy(&la, addr, len); - - if (la.l2_cid) - return -EINVAL; - - lock_sock(sk); - - if (sk->sk_state != BT_OPEN) { - err = -EBADFD; - goto done; - } - - if (la.l2_psm) { - __u16 psm = __le16_to_cpu(la.l2_psm); - - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((psm & 0x0101) != 0x0001) { - err = -EINVAL; - goto done; - } - - /* Restrict usage of well-known PSMs */ - if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { - err = -EACCES; - goto done; - } - } - - write_lock_bh(&l2cap_sk_list.lock); - - if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) { - err = -EADDRINUSE; - } else { - /* Save source address */ - bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); - l2cap_pi(sk)->psm = la.l2_psm; - l2cap_pi(sk)->sport = la.l2_psm; - sk->sk_state = BT_BOUND; - - if (__le16_to_cpu(la.l2_psm) == 0x0001 || - __le16_to_cpu(la.l2_psm) == 0x0003) - l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; - } - - write_unlock_bh(&l2cap_sk_list.lock); - -done: - release_sock(sk); - return err; -} - static int l2cap_do_connect(struct sock *sk) { bdaddr_t *src = &bt_sk(sk)->src; @@ -1068,65 +993,6 @@ done: return err; } -int l2cap_sock_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - int err = 0; - - BT_DBG("sk %p backlog %d", sk, backlog); - - lock_sock(sk); - - if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) - || sk->sk_state != BT_BOUND) { - err = -EBADFD; - goto done; - } - - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -ENOTSUPP; - goto done; - } - - if (!l2cap_pi(sk)->psm) { - bdaddr_t *src = &bt_sk(sk)->src; - u16 psm; - - err = -EINVAL; - - write_lock_bh(&l2cap_sk_list.lock); - - for (psm = 0x1001; psm < 0x1100; psm += 2) - if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) { - l2cap_pi(sk)->psm = cpu_to_le16(psm); - l2cap_pi(sk)->sport = cpu_to_le16(psm); - err = 0; - break; - } - - write_unlock_bh(&l2cap_sk_list.lock); - - if (err < 0) - goto done; - } - - sk->sk_max_ack_backlog = backlog; - sk->sk_ack_backlog = 0; - sk->sk_state = BT_LISTEN; - -done: - release_sock(sk); - return err; -} - int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) { DECLARE_WAITQUEUE(wait, current); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 20efd24..ef9a60f 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -62,6 +62,141 @@ static void l2cap_sock_timeout(unsigned long arg) sock_put(sk); } +static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) +{ + struct sock *sk; + struct hlist_node *node; + sk_for_each(sk, node, &l2cap_sk_list.head) + if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src)) + goto found; + sk = NULL; +found: + return sk; +} + +static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) +{ + struct sock *sk = sock->sk; + struct sockaddr_l2 la; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (!addr || addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + + memset(&la, 0, sizeof(la)); + len = min_t(unsigned int, sizeof(la), alen); + memcpy(&la, addr, len); + + if (la.l2_cid) + return -EINVAL; + + lock_sock(sk); + + if (sk->sk_state != BT_OPEN) { + err = -EBADFD; + goto done; + } + + if (la.l2_psm) { + __u16 psm = __le16_to_cpu(la.l2_psm); + + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((psm & 0x0101) != 0x0001) { + err = -EINVAL; + goto done; + } + + /* Restrict usage of well-known PSMs */ + if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { + err = -EACCES; + goto done; + } + } + + write_lock_bh(&l2cap_sk_list.lock); + + if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) { + err = -EADDRINUSE; + } else { + /* Save source address */ + bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); + l2cap_pi(sk)->psm = la.l2_psm; + l2cap_pi(sk)->sport = la.l2_psm; + sk->sk_state = BT_BOUND; + + if (__le16_to_cpu(la.l2_psm) == 0x0001 || + __le16_to_cpu(la.l2_psm) == 0x0003) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; + } + + write_unlock_bh(&l2cap_sk_list.lock); + +done: + release_sock(sk); + return err; +} + +static int l2cap_sock_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sk %p backlog %d", sk, backlog); + + lock_sock(sk); + + if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM) + || sk->sk_state != BT_BOUND) { + err = -EBADFD; + goto done; + } + + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -ENOTSUPP; + goto done; + } + + if (!l2cap_pi(sk)->psm) { + bdaddr_t *src = &bt_sk(sk)->src; + u16 psm; + + err = -EINVAL; + + write_lock_bh(&l2cap_sk_list.lock); + + for (psm = 0x1001; psm < 0x1100; psm += 2) + if (!__l2cap_get_sock_by_addr(cpu_to_le16(psm), src)) { + l2cap_pi(sk)->psm = cpu_to_le16(psm); + l2cap_pi(sk)->sport = cpu_to_le16(psm); + err = 0; + break; + } + + write_unlock_bh(&l2cap_sk_list.lock); + + if (err < 0) + goto done; + } + + sk->sk_max_ack_backlog = backlog; + sk->sk_ack_backlog = 0; + sk->sk_state = BT_LISTEN; + +done: + release_sock(sk); + return err; +} + static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; -- cgit v1.1 From c47b7c724bc7106acf602b2ce99922a2d14ea62b Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 02:42:23 -0200 Subject: Bluetooth: move l2cap_sock_accept() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 56 ---------------------------------------------- net/bluetooth/l2cap_sock.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 56 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 6af3872..ff6a54f 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -993,62 +993,6 @@ done: return err; } -int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) -{ - DECLARE_WAITQUEUE(wait, current); - struct sock *sk = sock->sk, *nsk; - long timeo; - int err = 0; - - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - - BT_DBG("sk %p timeo %ld", sk, timeo); - - /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { - set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - break; - } - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk_sleep(sk), &wait); - - if (err) - goto done; - - newsock->state = SS_CONNECTED; - - BT_DBG("new socket %p", nsk); - -done: - release_sock(sk); - return err; -} - int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) { struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index ef9a60f..b19a386 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -197,6 +197,62 @@ done: return err; } +static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int flags) +{ + DECLARE_WAITQUEUE(wait, current); + struct sock *sk = sock->sk, *nsk; + long timeo; + int err = 0; + + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; + goto done; + } + + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + BT_DBG("sk %p timeo %ld", sk, timeo); + + /* Wait for an incoming connection. (wake-one). */ + add_wait_queue_exclusive(sk_sleep(sk), &wait); + while (!(nsk = bt_accept_dequeue(sk, newsock))) { + set_current_state(TASK_INTERRUPTIBLE); + if (!timeo) { + err = -EAGAIN; + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + } + set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + + if (err) + goto done; + + newsock->state = SS_CONNECTED; + + BT_DBG("new socket %p", nsk); + +done: + release_sock(sk); + return err; +} + static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; -- cgit v1.1 From d7175d55255cb0a576844bc6e986000e0d7f8e9d Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 02:43:46 -0200 Subject: Bluetooth: move l2cap_sock_getname() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 23 ----------------------- net/bluetooth/l2cap_sock.c | 23 +++++++++++++++++++++++ 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ff6a54f..bd46cac 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -993,29 +993,6 @@ done: return err; } -int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) -{ - struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; - struct sock *sk = sock->sk; - - BT_DBG("sock %p, sk %p", sock, sk); - - addr->sa_family = AF_BLUETOOTH; - *len = sizeof(struct sockaddr_l2); - - if (peer) { - la->l2_psm = l2cap_pi(sk)->psm; - bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); - la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid); - } else { - la->l2_psm = l2cap_pi(sk)->sport; - bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); - la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid); - } - - return 0; -} - static int __l2cap_wait_ack(struct sock *sk) { DECLARE_WAITQUEUE(wait, current); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index b19a386..4c13f8b 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -253,6 +253,29 @@ done: return err; } +static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer) +{ + struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; + struct sock *sk = sock->sk; + + BT_DBG("sock %p, sk %p", sock, sk); + + addr->sa_family = AF_BLUETOOTH; + *len = sizeof(struct sockaddr_l2); + + if (peer) { + la->l2_psm = l2cap_pi(sk)->psm; + bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); + la->l2_cid = cpu_to_le16(l2cap_pi(sk)->dcid); + } else { + la->l2_psm = l2cap_pi(sk)->sport; + bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); + la->l2_cid = cpu_to_le16(l2cap_pi(sk)->scid); + } + + return 0; +} + static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; -- cgit v1.1 From 33575df7be6748292f88453f29319af6d639c5c8 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 02:48:48 -0200 Subject: Bluetooth: move l2cap_sock_setsockopt() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 174 -------------------------------------------- net/bluetooth/l2cap_sock.c | 175 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+), 174 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index bd46cac..9d35caf 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1596,180 +1596,6 @@ int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m return bt_sock_recvmsg(iocb, sock, msg, len, flags); } -static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct l2cap_options opts; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - lock_sock(sk); - - switch (optname) { - case L2CAP_OPTIONS: - if (sk->sk_state == BT_CONNECTED) { - err = -EINVAL; - break; - } - - opts.imtu = l2cap_pi(sk)->imtu; - opts.omtu = l2cap_pi(sk)->omtu; - opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = l2cap_pi(sk)->mode; - opts.fcs = l2cap_pi(sk)->fcs; - opts.max_tx = l2cap_pi(sk)->max_tx; - opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; - - len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_user((char *) &opts, optval, len)) { - err = -EFAULT; - break; - } - - if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { - err = -EINVAL; - break; - } - - l2cap_pi(sk)->mode = opts.mode; - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE; - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -EINVAL; - break; - } - - l2cap_pi(sk)->imtu = opts.imtu; - l2cap_pi(sk)->omtu = opts.omtu; - l2cap_pi(sk)->fcs = opts.fcs; - l2cap_pi(sk)->max_tx = opts.max_tx; - l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size; - break; - - case L2CAP_LM: - if (get_user(opt, (u32 __user *) optval)) { - err = -EFAULT; - break; - } - - if (opt & L2CAP_LM_AUTH) - l2cap_pi(sk)->sec_level = BT_SECURITY_LOW; - if (opt & L2CAP_LM_ENCRYPT) - l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM; - if (opt & L2CAP_LM_SECURE) - l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH; - - l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER); - l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE); - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct bt_security sec; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - if (level == SOL_L2CAP) - return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); - - if (level != SOL_BLUETOOTH) - return -ENOPROTOOPT; - - lock_sock(sk); - - switch (optname) { - case BT_SECURITY: - if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM - && sk->sk_type != SOCK_RAW) { - err = -EINVAL; - break; - } - - sec.level = BT_SECURITY_LOW; - - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_user((char *) &sec, optval, len)) { - err = -EFAULT; - break; - } - - if (sec.level < BT_SECURITY_LOW || - sec.level > BT_SECURITY_HIGH) { - err = -EINVAL; - break; - } - - l2cap_pi(sk)->sec_level = sec.level; - break; - - case BT_DEFER_SETUP: - if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { - err = -EINVAL; - break; - } - - if (get_user(opt, (u32 __user *) optval)) { - err = -EFAULT; - break; - } - - bt_sk(sk)->defer_setup = opt; - break; - - case BT_FLUSHABLE: - if (get_user(opt, (u32 __user *) optval)) { - err = -EFAULT; - break; - } - - if (opt > BT_FLUSHABLE_ON) { - err = -EINVAL; - break; - } - - if (opt == BT_FLUSHABLE_OFF) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - /* proceed futher only when we have l2cap_conn and - No Flush support in the LM */ - if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) { - err = -EINVAL; - break; - } - } - - l2cap_pi(sk)->flushable = opt; - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 4c13f8b..1bbe8a0 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -27,6 +27,7 @@ /* Bluetooth L2CAP sockets. */ #include +#include #include static void l2cap_sock_timeout(unsigned long arg) @@ -276,6 +277,180 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l return 0; } +static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct l2cap_options opts; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + lock_sock(sk); + + switch (optname) { + case L2CAP_OPTIONS: + if (sk->sk_state == BT_CONNECTED) { + err = -EINVAL; + break; + } + + opts.imtu = l2cap_pi(sk)->imtu; + opts.omtu = l2cap_pi(sk)->omtu; + opts.flush_to = l2cap_pi(sk)->flush_to; + opts.mode = l2cap_pi(sk)->mode; + opts.fcs = l2cap_pi(sk)->fcs; + opts.max_tx = l2cap_pi(sk)->max_tx; + opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; + + len = min_t(unsigned int, sizeof(opts), optlen); + if (copy_from_user((char *) &opts, optval, len)) { + err = -EFAULT; + break; + } + + if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) { + err = -EINVAL; + break; + } + + l2cap_pi(sk)->mode = opts.mode; + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE; + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -EINVAL; + break; + } + + l2cap_pi(sk)->imtu = opts.imtu; + l2cap_pi(sk)->omtu = opts.omtu; + l2cap_pi(sk)->fcs = opts.fcs; + l2cap_pi(sk)->max_tx = opts.max_tx; + l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size; + break; + + case L2CAP_LM: + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt & L2CAP_LM_AUTH) + l2cap_pi(sk)->sec_level = BT_SECURITY_LOW; + if (opt & L2CAP_LM_ENCRYPT) + l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM; + if (opt & L2CAP_LM_SECURE) + l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH; + + l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER); + l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE); + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct bt_security sec; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + if (level == SOL_L2CAP) + return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); + + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + + lock_sock(sk); + + switch (optname) { + case BT_SECURITY: + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM + && sk->sk_type != SOCK_RAW) { + err = -EINVAL; + break; + } + + sec.level = BT_SECURITY_LOW; + + len = min_t(unsigned int, sizeof(sec), optlen); + if (copy_from_user((char *) &sec, optval, len)) { + err = -EFAULT; + break; + } + + if (sec.level < BT_SECURITY_LOW || + sec.level > BT_SECURITY_HIGH) { + err = -EINVAL; + break; + } + + l2cap_pi(sk)->sec_level = sec.level; + break; + + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + bt_sk(sk)->defer_setup = opt; + break; + + case BT_FLUSHABLE: + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > BT_FLUSHABLE_ON) { + err = -EINVAL; + break; + } + + if (opt == BT_FLUSHABLE_OFF) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + /* proceed futher only when we have l2cap_conn and + No Flush support in the LM */ + if (!conn || !lmp_no_flush_capable(conn->hcon->hdev)) { + err = -EINVAL; + break; + } + } + + l2cap_pi(sk)->flushable = opt; + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; -- cgit v1.1 From 99f4808db0c052f3c92a689ec2841618bf2ce14a Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 02:52:55 -0200 Subject: Bluetooth: move l2cap_sock_getsockopt() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 145 --------------------------------------------- net/bluetooth/l2cap_sock.c | 145 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 145 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9d35caf..8e015d9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1596,151 +1596,6 @@ int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m return bt_sock_recvmsg(iocb, sock, msg, len, flags); } -static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct l2cap_options opts; - struct l2cap_conninfo cinfo; - int len, err = 0; - u32 opt; - - BT_DBG("sk %p", sk); - - if (get_user(len, optlen)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case L2CAP_OPTIONS: - opts.imtu = l2cap_pi(sk)->imtu; - opts.omtu = l2cap_pi(sk)->omtu; - opts.flush_to = l2cap_pi(sk)->flush_to; - opts.mode = l2cap_pi(sk)->mode; - opts.fcs = l2cap_pi(sk)->fcs; - opts.max_tx = l2cap_pi(sk)->max_tx; - opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; - - len = min_t(unsigned int, len, sizeof(opts)); - if (copy_to_user(optval, (char *) &opts, len)) - err = -EFAULT; - - break; - - case L2CAP_LM: - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_LOW: - opt = L2CAP_LM_AUTH; - break; - case BT_SECURITY_MEDIUM: - opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT; - break; - case BT_SECURITY_HIGH: - opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT | - L2CAP_LM_SECURE; - break; - default: - opt = 0; - break; - } - - if (l2cap_pi(sk)->role_switch) - opt |= L2CAP_LM_MASTER; - - if (l2cap_pi(sk)->force_reliable) - opt |= L2CAP_LM_RELIABLE; - - if (put_user(opt, (u32 __user *) optval)) - err = -EFAULT; - break; - - case L2CAP_CONNINFO: - if (sk->sk_state != BT_CONNECTED && - !(sk->sk_state == BT_CONNECT2 && - bt_sk(sk)->defer_setup)) { - err = -ENOTCONN; - break; - } - - cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle; - memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3); - - len = min_t(unsigned int, len, sizeof(cinfo)); - if (copy_to_user(optval, (char *) &cinfo, len)) - err = -EFAULT; - - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - -int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct bt_security sec; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (level == SOL_L2CAP) - return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); - - if (level != SOL_BLUETOOTH) - return -ENOPROTOOPT; - - if (get_user(len, optlen)) - return -EFAULT; - - lock_sock(sk); - - switch (optname) { - case BT_SECURITY: - if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM - && sk->sk_type != SOCK_RAW) { - err = -EINVAL; - break; - } - - sec.level = l2cap_pi(sk)->sec_level; - - len = min_t(unsigned int, len, sizeof(sec)); - if (copy_to_user(optval, (char *) &sec, len)) - err = -EFAULT; - - break; - - case BT_DEFER_SETUP: - if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { - err = -EINVAL; - break; - } - - if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) - err = -EFAULT; - - break; - - case BT_FLUSHABLE: - if (put_user(l2cap_pi(sk)->flushable, (u32 __user *) optval)) - err = -EFAULT; - - break; - - default: - err = -ENOPROTOOPT; - break; - } - - release_sock(sk); - return err; -} - int l2cap_sock_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 1bbe8a0..b7d5ae9 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -277,6 +277,151 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l return 0; } +static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct l2cap_options opts; + struct l2cap_conninfo cinfo; + int len, err = 0; + u32 opt; + + BT_DBG("sk %p", sk); + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case L2CAP_OPTIONS: + opts.imtu = l2cap_pi(sk)->imtu; + opts.omtu = l2cap_pi(sk)->omtu; + opts.flush_to = l2cap_pi(sk)->flush_to; + opts.mode = l2cap_pi(sk)->mode; + opts.fcs = l2cap_pi(sk)->fcs; + opts.max_tx = l2cap_pi(sk)->max_tx; + opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win; + + len = min_t(unsigned int, len, sizeof(opts)); + if (copy_to_user(optval, (char *) &opts, len)) + err = -EFAULT; + + break; + + case L2CAP_LM: + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_LOW: + opt = L2CAP_LM_AUTH; + break; + case BT_SECURITY_MEDIUM: + opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT; + break; + case BT_SECURITY_HIGH: + opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT | + L2CAP_LM_SECURE; + break; + default: + opt = 0; + break; + } + + if (l2cap_pi(sk)->role_switch) + opt |= L2CAP_LM_MASTER; + + if (l2cap_pi(sk)->force_reliable) + opt |= L2CAP_LM_RELIABLE; + + if (put_user(opt, (u32 __user *) optval)) + err = -EFAULT; + break; + + case L2CAP_CONNINFO: + if (sk->sk_state != BT_CONNECTED && + !(sk->sk_state == BT_CONNECT2 && + bt_sk(sk)->defer_setup)) { + err = -ENOTCONN; + break; + } + + cinfo.hci_handle = l2cap_pi(sk)->conn->hcon->handle; + memcpy(cinfo.dev_class, l2cap_pi(sk)->conn->hcon->dev_class, 3); + + len = min_t(unsigned int, len, sizeof(cinfo)); + if (copy_to_user(optval, (char *) &cinfo, len)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct bt_security sec; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (level == SOL_L2CAP) + return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); + + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + case BT_SECURITY: + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM + && sk->sk_type != SOCK_RAW) { + err = -EINVAL; + break; + } + + sec.level = l2cap_pi(sk)->sec_level; + + len = min_t(unsigned int, len, sizeof(sec)); + if (copy_to_user(optval, (char *) &sec, len)) + err = -EFAULT; + + break; + + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) + err = -EFAULT; + + break; + + case BT_FLUSHABLE: + if (put_user(l2cap_pi(sk)->flushable, (u32 __user *) optval)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) { struct sock *sk = sock->sk; -- cgit v1.1 From 4e34c50bfe5ba87da1622cc7c6ed10712da255ad Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 02:56:13 -0200 Subject: Bluetooth: move l2cap_sock_connect() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 87 +--------------------------------------------- net/bluetooth/l2cap_sock.c | 85 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 86 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 8e015d9..9732745 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -848,7 +848,7 @@ static void l2cap_sock_close(struct sock *sk) l2cap_sock_kill(sk); } -static int l2cap_do_connect(struct sock *sk) +int l2cap_do_connect(struct sock *sk) { bdaddr_t *src = &bt_sk(sk)->src; bdaddr_t *dst = &bt_sk(sk)->dst; @@ -908,91 +908,6 @@ done: return err; } -int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) -{ - struct sock *sk = sock->sk; - struct sockaddr_l2 la; - int len, err = 0; - - BT_DBG("sk %p", sk); - - if (!addr || alen < sizeof(addr->sa_family) || - addr->sa_family != AF_BLUETOOTH) - return -EINVAL; - - memset(&la, 0, sizeof(la)); - len = min_t(unsigned int, sizeof(la), alen); - memcpy(&la, addr, len); - - if (la.l2_cid) - return -EINVAL; - - lock_sock(sk); - - if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) - && !la.l2_psm) { - err = -EINVAL; - goto done; - } - - switch (l2cap_pi(sk)->mode) { - case L2CAP_MODE_BASIC: - break; - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - if (!disable_ertm) - break; - /* fall through */ - default: - err = -ENOTSUPP; - goto done; - } - - switch (sk->sk_state) { - case BT_CONNECT: - case BT_CONNECT2: - case BT_CONFIG: - /* Already connecting */ - goto wait; - - case BT_CONNECTED: - /* Already connected */ - err = -EISCONN; - goto done; - - case BT_OPEN: - case BT_BOUND: - /* Can connect */ - break; - - default: - err = -EBADFD; - goto done; - } - - /* PSM must be odd and lsb of upper byte must be 0 */ - if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && - sk->sk_type != SOCK_RAW) { - err = -EINVAL; - goto done; - } - - /* Set destination address and psm */ - bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); - l2cap_pi(sk)->psm = la.l2_psm; - - err = l2cap_do_connect(sk); - if (err) - goto done; - -wait: - err = bt_sock_wait_state(sk, BT_CONNECTED, - sock_sndtimeo(sk, flags & O_NONBLOCK)); -done: - release_sock(sk); - return err; -} - static int __l2cap_wait_ack(struct sock *sk) { DECLARE_WAITQUEUE(wait, current); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index b7d5ae9..e2f14f1 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -139,6 +139,91 @@ done: return err; } +static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_l2 la; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (!addr || alen < sizeof(addr->sa_family) || + addr->sa_family != AF_BLUETOOTH) + return -EINVAL; + + memset(&la, 0, sizeof(la)); + len = min_t(unsigned int, sizeof(la), alen); + memcpy(&la, addr, len); + + if (la.l2_cid) + return -EINVAL; + + lock_sock(sk); + + if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) + && !la.l2_psm) { + err = -EINVAL; + goto done; + } + + switch (l2cap_pi(sk)->mode) { + case L2CAP_MODE_BASIC: + break; + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + if (!disable_ertm) + break; + /* fall through */ + default: + err = -ENOTSUPP; + goto done; + } + + switch (sk->sk_state) { + case BT_CONNECT: + case BT_CONNECT2: + case BT_CONFIG: + /* Already connecting */ + goto wait; + + case BT_CONNECTED: + /* Already connected */ + err = -EISCONN; + goto done; + + case BT_OPEN: + case BT_BOUND: + /* Can connect */ + break; + + default: + err = -EBADFD; + goto done; + } + + /* PSM must be odd and lsb of upper byte must be 0 */ + if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && + sk->sk_type != SOCK_RAW) { + err = -EINVAL; + goto done; + } + + /* Set destination address and psm */ + bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); + l2cap_pi(sk)->psm = la.l2_psm; + + err = l2cap_do_connect(sk); + if (err) + goto done; + +wait: + err = bt_sock_wait_state(sk, BT_CONNECTED, + sock_sndtimeo(sk, flags & O_NONBLOCK)); +done: + release_sock(sk); + return err; +} + static int l2cap_sock_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; -- cgit v1.1 From 6898325923f9571fbede3372dc490faa43b3258a Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 03:02:31 -0200 Subject: Bluetooth: move l2cap_sock_recvmsg() to l2cap_sock.c It causes the move of the declaration of 3 functions to l2cap.h: l2cap_get_ident(), l2cap_send_cmd(), l2cap_build_conf_req() Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 49 +++------------------------------------------- net/bluetooth/l2cap_sock.c | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9732745..3a0e42b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -72,7 +72,6 @@ static void l2cap_busy_work(struct work_struct *work); static void l2cap_sock_close(struct sock *sk); -static int l2cap_build_conf_req(struct sock *sk, void *data); static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); @@ -311,7 +310,7 @@ static inline int l2cap_check_security(struct sock *sk) auth_type); } -static inline u8 l2cap_get_ident(struct l2cap_conn *conn) +u8 l2cap_get_ident(struct l2cap_conn *conn) { u8 id; @@ -333,7 +332,7 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn) return id; } -static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) +void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) { struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); u8 flags; @@ -1469,48 +1468,6 @@ done: return err; } -int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) -{ - struct sock *sk = sock->sk; - - lock_sock(sk); - - if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { - struct l2cap_conn_rsp rsp; - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - u8 buf[128]; - - sk->sk_state = BT_CONFIG; - - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - - if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) { - release_sock(sk); - return 0; - } - - l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; - l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(sk, buf), buf); - l2cap_pi(sk)->num_conf_req++; - - release_sock(sk); - return 0; - } - - release_sock(sk); - - if (sock->type == SOCK_STREAM) - return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); - - return bt_sock_recvmsg(iocb, sock, msg, len, flags); -} - int l2cap_sock_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; @@ -1760,7 +1717,7 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) } } -static int l2cap_build_conf_req(struct sock *sk, void *data) +int l2cap_build_conf_req(struct sock *sk, void *data) { struct l2cap_pinfo *pi = l2cap_pi(sk); struct l2cap_conf_req *req = data; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index e2f14f1..fa2bc5d 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -681,6 +681,48 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch return err; } +static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk = sock->sk; + + lock_sock(sk); + + if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { + struct l2cap_conn_rsp rsp; + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + u8 buf[128]; + + sk->sk_state = BT_CONFIG; + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) { + release_sock(sk); + return 0; + } + + l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, + l2cap_build_conf_req(sk, buf), buf); + l2cap_pi(sk)->num_conf_req++; + + release_sock(sk); + return 0; + } + + release_sock(sk); + + if (sock->type == SOCK_STREAM) + return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags); + + return bt_sock_recvmsg(iocb, sock, msg, len, flags); +} + static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; -- cgit v1.1 From dcba0dba54b566a08376f93cab35cdabd6abda20 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 03:08:36 -0200 Subject: Bluetooth: move l2cap_sock_shutdown() to l2cap_sock.c Declare __l2cap_wait_ack() and l2cap_sock_clear_timer() in l2cap.h Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 35 ++--------------------------------- net/bluetooth/l2cap_sock.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3a0e42b..6e48e58 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -84,7 +84,7 @@ void l2cap_sock_set_timer(struct sock *sk, long timeout) sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); } -static void l2cap_sock_clear_timer(struct sock *sk) +void l2cap_sock_clear_timer(struct sock *sk) { BT_DBG("sock %p state %d", sk, sk->sk_state); sk_stop_timer(sk, &sk->sk_timer); @@ -907,7 +907,7 @@ done: return err; } -static int __l2cap_wait_ack(struct sock *sk) +int __l2cap_wait_ack(struct sock *sk) { DECLARE_WAITQUEUE(wait, current); int err = 0; @@ -1468,37 +1468,6 @@ done: return err; } -int l2cap_sock_shutdown(struct socket *sock, int how) -{ - struct sock *sk = sock->sk; - int err = 0; - - BT_DBG("sock %p, sk %p", sock, sk); - - if (!sk) - return 0; - - lock_sock(sk); - if (!sk->sk_shutdown) { - if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) - err = __l2cap_wait_ack(sk); - - sk->sk_shutdown = SHUTDOWN_MASK; - l2cap_sock_clear_timer(sk); - __l2cap_sock_close(sk, 0); - - if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) - err = bt_sock_wait_state(sk, BT_CLOSED, - sk->sk_lingertime); - } - - if (!err && sk->sk_err) - err = -sk->sk_err; - - release_sock(sk); - return err; -} - static void l2cap_chan_ready(struct sock *sk) { struct sock *parent = bt_sk(sk)->parent; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index fa2bc5d..93af233 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -723,6 +723,37 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms return bt_sock_recvmsg(iocb, sock, msg, len, flags); } +static int l2cap_sock_shutdown(struct socket *sock, int how) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sock %p, sk %p", sock, sk); + + if (!sk) + return 0; + + lock_sock(sk); + if (!sk->sk_shutdown) { + if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) + err = __l2cap_wait_ack(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + l2cap_sock_clear_timer(sk); + __l2cap_sock_close(sk, 0); + + if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) + err = bt_sock_wait_state(sk, BT_CLOSED, + sk->sk_lingertime); + } + + if (!err && sk->sk_err) + err = -sk->sk_err; + + release_sock(sk); + return err; +} + static int l2cap_sock_release(struct socket *sock) { struct sock *sk = sock->sk; -- cgit v1.1 From fd83ccdb393e3190633e0240dd73faac8998164b Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 03:20:52 -0200 Subject: Bluetooth: move l2cap_sock_sendmsg() to l2cap_sock.c Also moves some L2CAP sending functions declaration to l2cap.h Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 116 +++------------------------------------------ net/bluetooth/l2cap_sock.c | 102 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 109 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 6e48e58..da9b3a4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -993,7 +993,7 @@ static void l2cap_drop_acked_frames(struct sock *sk) del_timer(&l2cap_pi(sk)->retrans_timer); } -static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) +void l2cap_do_send(struct sock *sk, struct sk_buff *skb) { struct l2cap_pinfo *pi = l2cap_pi(sk); struct hci_conn *hcon = pi->conn->hcon; @@ -1009,7 +1009,7 @@ static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb) hci_send_acl(hcon, skb, flags); } -static void l2cap_streaming_send(struct sock *sk) +void l2cap_streaming_send(struct sock *sk) { struct sk_buff *skb; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1078,7 +1078,7 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq) l2cap_do_send(sk, tx_skb); } -static int l2cap_ertm_send(struct sock *sk) +int l2cap_ertm_send(struct sock *sk) { struct sk_buff *skb, *tx_skb; struct l2cap_pinfo *pi = l2cap_pi(sk); @@ -1218,7 +1218,7 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in return sent; } -static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len) +struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct sk_buff *skb; @@ -1247,7 +1247,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr return skb; } -static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len) +struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct sk_buff *skb; @@ -1275,7 +1275,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *ms return skb; } -static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen) +struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct sk_buff *skb; @@ -1320,7 +1320,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m return skb; } -static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len) +int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len) { struct l2cap_pinfo *pi = l2cap_pi(sk); struct sk_buff *skb; @@ -1366,108 +1366,6 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz return size; } -int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - struct l2cap_pinfo *pi = l2cap_pi(sk); - struct sk_buff *skb; - u16 control; - int err; - - BT_DBG("sock %p, sk %p", sock, sk); - - err = sock_error(sk); - if (err) - return err; - - if (msg->msg_flags & MSG_OOB) - return -EOPNOTSUPP; - - lock_sock(sk); - - if (sk->sk_state != BT_CONNECTED) { - err = -ENOTCONN; - goto done; - } - - /* Connectionless channel */ - if (sk->sk_type == SOCK_DGRAM) { - skb = l2cap_create_connless_pdu(sk, msg, len); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - } else { - l2cap_do_send(sk, skb); - err = len; - } - goto done; - } - - switch (pi->mode) { - case L2CAP_MODE_BASIC: - /* Check outgoing MTU */ - if (len > pi->omtu) { - err = -EMSGSIZE; - goto done; - } - - /* Create a basic PDU */ - skb = l2cap_create_basic_pdu(sk, msg, len); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto done; - } - - l2cap_do_send(sk, skb); - err = len; - break; - - case L2CAP_MODE_ERTM: - case L2CAP_MODE_STREAMING: - /* Entire SDU fits into one PDU */ - if (len <= pi->remote_mps) { - control = L2CAP_SDU_UNSEGMENTED; - skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto done; - } - __skb_queue_tail(TX_QUEUE(sk), skb); - - if (sk->sk_send_head == NULL) - sk->sk_send_head = skb; - - } else { - /* Segment SDU into multiples PDUs */ - err = l2cap_sar_segment_sdu(sk, msg, len); - if (err < 0) - goto done; - } - - if (pi->mode == L2CAP_MODE_STREAMING) { - l2cap_streaming_send(sk); - } else { - if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && - (pi->conn_state & L2CAP_CONN_WAIT_F)) { - err = len; - break; - } - err = l2cap_ertm_send(sk); - } - - if (err >= 0) - err = len; - break; - - default: - BT_DBG("bad state %1.1x", pi->mode); - err = -EBADFD; - } - -done: - release_sock(sk); - return err; -} - static void l2cap_chan_ready(struct sock *sk) { struct sock *parent = bt_sk(sk)->parent; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 93af233..fe4f834 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -681,6 +681,108 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch return err; } +static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) +{ + struct sock *sk = sock->sk; + struct l2cap_pinfo *pi = l2cap_pi(sk); + struct sk_buff *skb; + u16 control; + int err; + + BT_DBG("sock %p, sk %p", sock, sk); + + err = sock_error(sk); + if (err) + return err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + lock_sock(sk); + + if (sk->sk_state != BT_CONNECTED) { + err = -ENOTCONN; + goto done; + } + + /* Connectionless channel */ + if (sk->sk_type == SOCK_DGRAM) { + skb = l2cap_create_connless_pdu(sk, msg, len); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + } else { + l2cap_do_send(sk, skb); + err = len; + } + goto done; + } + + switch (pi->mode) { + case L2CAP_MODE_BASIC: + /* Check outgoing MTU */ + if (len > pi->omtu) { + err = -EMSGSIZE; + goto done; + } + + /* Create a basic PDU */ + skb = l2cap_create_basic_pdu(sk, msg, len); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto done; + } + + l2cap_do_send(sk, skb); + err = len; + break; + + case L2CAP_MODE_ERTM: + case L2CAP_MODE_STREAMING: + /* Entire SDU fits into one PDU */ + if (len <= pi->remote_mps) { + control = L2CAP_SDU_UNSEGMENTED; + skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0); + if (IS_ERR(skb)) { + err = PTR_ERR(skb); + goto done; + } + __skb_queue_tail(TX_QUEUE(sk), skb); + + if (sk->sk_send_head == NULL) + sk->sk_send_head = skb; + + } else { + /* Segment SDU into multiples PDUs */ + err = l2cap_sar_segment_sdu(sk, msg, len); + if (err < 0) + goto done; + } + + if (pi->mode == L2CAP_MODE_STREAMING) { + l2cap_streaming_send(sk); + } else { + if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && + (pi->conn_state & L2CAP_CONN_WAIT_F)) { + err = len; + break; + } + err = l2cap_ertm_send(sk); + } + + if (err >= 0) + err = len; + break; + + default: + BT_DBG("bad state %1.1x", pi->mode); + err = -EBADFD; + } + +done: + release_sock(sk); + return err; +} + static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; -- cgit v1.1 From 6ddc0485e1a6ecd450140ea40ffa52786f99183c Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 03:23:31 -0200 Subject: Bluetooth: move L2CAP sock timers function to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 13 ------------- net/bluetooth/l2cap_sock.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index da9b3a4..12abd7c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -77,19 +77,6 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb); -/* ---- L2CAP timers ---- */ -void l2cap_sock_set_timer(struct sock *sk, long timeout) -{ - BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); -} - -void l2cap_sock_clear_timer(struct sock *sk) -{ - BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); -} - /* ---- L2CAP channels ---- */ static struct sock *__l2cap_get_chan_by_dcid(struct l2cap_chan_list *l, u16 cid) { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index fe4f834..23bb968 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -30,6 +30,7 @@ #include #include +/* ---- L2CAP timers ---- */ static void l2cap_sock_timeout(unsigned long arg) { struct sock *sk = (struct sock *) arg; @@ -63,6 +64,18 @@ static void l2cap_sock_timeout(unsigned long arg) sock_put(sk); } +void l2cap_sock_set_timer(struct sock *sk, long timeout) +{ + BT_DBG("sk %p state %d timeout %ld", sk, sk->sk_state, timeout); + sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); +} + +void l2cap_sock_clear_timer(struct sock *sk) +{ + BT_DBG("sock %p state %d", sk, sk->sk_state); + sk_stop_timer(sk, &sk->sk_timer); +} + static struct sock *__l2cap_get_sock_by_addr(__le16 psm, bdaddr_t *src) { struct sock *sk; -- cgit v1.1 From 05fc1576dabb1defae3c8c0371fb9d21f7db997a Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 03:26:01 -0200 Subject: Bluetooth: move l2cap_sock_kill() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 16 ---------------- net/bluetooth/l2cap_sock.c | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 12abd7c..9d51af3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -753,22 +753,6 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) sock_set_flag(parent, SOCK_ZAPPED); } -/* Kill socket (only if zapped and orphan) - * Must be called on unlocked socket. - */ -void l2cap_sock_kill(struct sock *sk) -{ - if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) - return; - - BT_DBG("sk %p state %d", sk, sk->sk_state); - - /* Kill poor orphan */ - bt_sock_unlink(&l2cap_sk_list, sk); - sock_set_flag(sk, SOCK_DEAD); - sock_put(sk); -} - void __l2cap_sock_close(struct sock *sk, int reason) { BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 23bb968..4b4e020 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -838,6 +838,22 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms return bt_sock_recvmsg(iocb, sock, msg, len, flags); } +/* Kill socket (only if zapped and orphan) + * Must be called on unlocked socket. + */ +void l2cap_sock_kill(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) + return; + + BT_DBG("sk %p state %d", sk, sk->sk_state); + + /* Kill poor orphan */ + bt_sock_unlink(&l2cap_sk_list, sk); + sock_set_flag(sk, SOCK_DEAD); + sock_put(sk); +} + static int l2cap_sock_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; -- cgit v1.1 From 6de0702b5b93da0ef097aa092b4597fbc024ebba Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 4 Feb 2011 03:35:20 -0200 Subject: Bluetooth: move __l2cap_sock_close() to l2cap_sock.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 85 ++-------------------------------------------- net/bluetooth/l2cap_sock.c | 78 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 83 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9d51af3..ba7f9da 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -70,8 +70,6 @@ struct bt_sock_list l2cap_sk_list = { static void l2cap_busy_work(struct work_struct *work); -static void l2cap_sock_close(struct sock *sk); - static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); @@ -207,7 +205,7 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so /* Delete channel. * Must be called on the locked socket. */ -static void l2cap_chan_del(struct sock *sk, int err) +void l2cap_chan_del(struct sock *sk, int err) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct sock *parent = bt_sk(sk)->parent; @@ -457,7 +455,7 @@ static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) } } -static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) +void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err) { struct l2cap_disconn_req req; @@ -739,85 +737,6 @@ static struct sock *l2cap_get_sock_by_psm(int state, __le16 psm, bdaddr_t *src) return node ? sk : sk1; } -static void l2cap_sock_cleanup_listen(struct sock *parent) -{ - struct sock *sk; - - BT_DBG("parent %p", parent); - - /* Close not yet accepted channels */ - while ((sk = bt_accept_dequeue(parent, NULL))) - l2cap_sock_close(sk); - - parent->sk_state = BT_CLOSED; - sock_set_flag(parent, SOCK_ZAPPED); -} - -void __l2cap_sock_close(struct sock *sk, int reason) -{ - BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); - - switch (sk->sk_state) { - case BT_LISTEN: - l2cap_sock_cleanup_listen(sk); - break; - - case BT_CONNECTED: - case BT_CONFIG: - if (sk->sk_type == SOCK_SEQPACKET || - sk->sk_type == SOCK_STREAM) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - - l2cap_sock_set_timer(sk, sk->sk_sndtimeo); - l2cap_send_disconn_req(conn, sk, reason); - } else - l2cap_chan_del(sk, reason); - break; - - case BT_CONNECT2: - if (sk->sk_type == SOCK_SEQPACKET || - sk->sk_type == SOCK_STREAM) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - struct l2cap_conn_rsp rsp; - __u16 result; - - if (bt_sk(sk)->defer_setup) - result = L2CAP_CR_SEC_BLOCK; - else - result = L2CAP_CR_BAD_PSM; - sk->sk_state = BT_DISCONN; - - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - } else - l2cap_chan_del(sk, reason); - break; - - case BT_CONNECT: - case BT_DISCONN: - l2cap_chan_del(sk, reason); - break; - - default: - sock_set_flag(sk, SOCK_ZAPPED); - break; - } -} - -/* Must be called on unlocked socket. */ -static void l2cap_sock_close(struct sock *sk) -{ - l2cap_sock_clear_timer(sk); - lock_sock(sk); - __l2cap_sock_close(sk, ECONNRESET); - release_sock(sk); - l2cap_sock_kill(sk); -} - int l2cap_do_connect(struct sock *sk) { bdaddr_t *src = &bt_sk(sk)->src; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 4b4e020..adf4169 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -854,6 +854,84 @@ void l2cap_sock_kill(struct sock *sk) sock_put(sk); } +/* Must be called on unlocked socket. */ +static void l2cap_sock_close(struct sock *sk) +{ + l2cap_sock_clear_timer(sk); + lock_sock(sk); + __l2cap_sock_close(sk, ECONNRESET); + release_sock(sk); + l2cap_sock_kill(sk); +} + +static void l2cap_sock_cleanup_listen(struct sock *parent) +{ + struct sock *sk; + + BT_DBG("parent %p", parent); + + /* Close not yet accepted channels */ + while ((sk = bt_accept_dequeue(parent, NULL))) + l2cap_sock_close(sk); + + parent->sk_state = BT_CLOSED; + sock_set_flag(parent, SOCK_ZAPPED); +} + +void __l2cap_sock_close(struct sock *sk, int reason) +{ + BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); + + switch (sk->sk_state) { + case BT_LISTEN: + l2cap_sock_cleanup_listen(sk); + break; + + case BT_CONNECTED: + case BT_CONFIG: + if (sk->sk_type == SOCK_SEQPACKET || + sk->sk_type == SOCK_STREAM) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + + l2cap_sock_set_timer(sk, sk->sk_sndtimeo); + l2cap_send_disconn_req(conn, sk, reason); + } else + l2cap_chan_del(sk, reason); + break; + + case BT_CONNECT2: + if (sk->sk_type == SOCK_SEQPACKET || + sk->sk_type == SOCK_STREAM) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct l2cap_conn_rsp rsp; + __u16 result; + + if (bt_sk(sk)->defer_setup) + result = L2CAP_CR_SEC_BLOCK; + else + result = L2CAP_CR_BAD_PSM; + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } else + l2cap_chan_del(sk, reason); + break; + + case BT_CONNECT: + case BT_DISCONN: + l2cap_chan_del(sk, reason); + break; + + default: + sock_set_flag(sk, SOCK_ZAPPED); + break; + } +} + static int l2cap_sock_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; -- cgit v1.1 From a0019bca04029d25a8bbbaaaf28487e6ccd7878e Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Mon, 7 Feb 2011 19:15:43 -0200 Subject: Bluetooth: update Bluetooth daemon name in Kconfig help Change hcid to bluetoothd. Signed-off-by: Gustavo F. Padovan --- net/bluetooth/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index ed37168..e45eae6 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -27,9 +27,9 @@ menuconfig BT compile it as module (bluetooth). To use Linux Bluetooth subsystem, you will need several user-space - utilities like hciconfig and hcid. These utilities and updates to - Bluetooth kernel modules are provided in the BlueZ packages. - For more information, see . + utilities like hciconfig and bluetoothd. These utilities and updates + to Bluetooth kernel modules are provided in the BlueZ packages. For + more information, see . config BT_L2CAP tristate "L2CAP protocol support" -- cgit v1.1 From fa9921e46fd52b78070dc67ce0d27ec301a90410 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 2 Feb 2011 06:29:02 +0000 Subject: ipsec: allow to align IPv4 AH on 32 bits The Linux IPv4 AH stack aligns the AH header on a 64 bit boundary (like in IPv6). This is not RFC compliant (see RFC4302, Section 3.3.3.2.1), it should be aligned on 32 bits. For most of the authentication algorithms, the ICV size is 96 bits. The AH header alignment on 32 or 64 bits gives the same results. However for SHA-256-128 for instance, the wrong 64 bit alignment results in adding useless padding in IPv4 AH, which is forbidden by the RFC. To avoid breaking backward compatibility, we use a new flag (XFRM_STATE_ALIGN4) do change original behavior. Initial patch from Dang Hongwu and Christophe Gouault . Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 86961be..325053d 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -201,7 +201,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ttl = 0; top_iph->check = 0; - ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; + if (x->props.flags & XFRM_STATE_ALIGN4) + ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; + else + ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; @@ -299,9 +302,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; - if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && - ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) - goto out; + if (x->props.flags & XFRM_STATE_ALIGN4) { + if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len)) + goto out; + } else { + if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && + ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) + goto out; + } if (!pskb_may_pull(skb, ah_hlen)) goto out; @@ -450,8 +459,12 @@ static int ah_init_state(struct xfrm_state *x) BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN); - x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + - ahp->icv_trunc_len); + if (x->props.flags & XFRM_STATE_ALIGN4) + x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); + else + x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + + ahp->icv_trunc_len); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; -- cgit v1.1 From 8d13a2a9fb3e5e3f68e9d3ec0de3c8fcfa56a224 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Feb 2011 16:17:55 -0800 Subject: net: Kill NETEVENT_PMTU_UPDATE. Nobody actually does anything in response to the event, so just kill it off. Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 - net/ipv6/route.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2e225da..0455af8 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1762,7 +1762,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } dst_metric_set(dst, RTAX_MTU, mtu); dst_set_expires(dst, ip_rt_mtu_expires); - call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0a63d44..12ec83d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -965,7 +965,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); - call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } -- cgit v1.1 From 4d51e149a6231ea9cc210795cbc358a7c9a8e016 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 7 Feb 2011 13:44:34 -0800 Subject: mac80211: Properly set work-item channel-type. Some were indirectly set to NO_HT (zero), but I think it's better to explicitly set it in case the enum ever changes. In cfg.c, it seems the channel-type was just ignored (and thus always set to NO_HT). Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 1 + net/mac80211/mlme.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 845c76d..f7a1f08 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1862,6 +1862,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, wk->type = IEEE80211_WORK_OFFCHANNEL_TX; wk->chan = chan; + wk->chan_type = channel_type; wk->sdata = sdata; wk->done = ieee80211_offchan_tx_done; wk->offchan_tx.frame = skb; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f77adf1..d89e878 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2294,6 +2294,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, else wk->type = IEEE80211_WORK_DIRECT_PROBE; wk->chan = req->bss->channel; + wk->chan_type = NL80211_CHAN_NO_HT; wk->sdata = sdata; wk->done = ieee80211_probe_auth_done; @@ -2443,6 +2444,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN); wk->chan = req->bss->channel; + wk->chan_type = NL80211_CHAN_NO_HT; wk->sdata = sdata; wk->done = ieee80211_assoc_done; if (!bss->dtim_period && -- cgit v1.1 From 4f2e9d91f84ce39698517203974ffc2bcc32a21d Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 7 Feb 2011 13:44:35 -0800 Subject: mac80211: Allow scanning on existing channel-type. Previous code set the channel type to NO_HT, but it appears that NO_HT packets can be sent on any channel type, so we do not need to change the channel type as long as the channel is correct. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/main.c | 16 ++++++++++++++-- net/mac80211/scan.c | 6 ++---- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c155c0b..86562ce 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -112,7 +112,13 @@ bool ieee80211_cfg_on_oper_channel(struct ieee80211_local *local) /* This logic needs to match logic in ieee80211_hw_config */ if (local->scan_channel) { chan = local->scan_channel; - channel_type = NL80211_CHAN_NO_HT; + /* If scanning on oper channel, use whatever channel-type + * is currently in use. + */ + if (chan == local->oper_channel) + channel_type = local->_oper_channel_type; + else + channel_type = NL80211_CHAN_NO_HT; } else if (local->tmp_channel) { chan = scan_chan = local->tmp_channel; channel_type = local->tmp_channel_type; @@ -151,7 +157,13 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; if (scan_chan) { chan = scan_chan; - channel_type = NL80211_CHAN_NO_HT; + /* If scanning on oper channel, use whatever channel-type + * is currently in use. + */ + if (chan == local->oper_channel) + channel_type = local->_oper_channel_type; + else + channel_type = NL80211_CHAN_NO_HT; } else if (local->tmp_channel) { chan = scan_chan = local->tmp_channel; channel_type = local->tmp_channel_type; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0ea6ada..0e70d7a 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -517,8 +517,7 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, if (ieee80211_cfg_on_oper_channel(local)) { /* We're currently on operating channel. */ - if ((next_chan == local->oper_channel) && - (local->_oper_channel_type == NL80211_CHAN_NO_HT)) + if (next_chan == local->oper_channel) /* We don't need to move off of operating channel. */ local->next_scan_state = SCAN_SET_CHANNEL; else @@ -620,8 +619,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, local->scan_channel = chan; /* Only call hw-config if we really need to change channels. */ - if ((chan != local->hw.conf.channel) || - (local->hw.conf.channel_type != NL80211_CHAN_NO_HT)) + if (chan != local->hw.conf.channel) if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) skip = 1; -- cgit v1.1 From da2fd1f0f7b78f21f6378f726d1f6de9d573b2d4 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 7 Feb 2011 13:44:36 -0800 Subject: mac80211: Allow work items to use existing channel type. Narrow channel types can function within larger channel types. So, use existing channel type for work items when possible. This decreases hardware channel changes significantly when using non NO_HT channel types on the operating channel. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/work.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 6bf787a..64f2b28 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -874,6 +874,44 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, kfree_skb(skb); } +static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct, + enum nl80211_channel_type oper_ct) +{ + switch (wk_ct) { + case NL80211_CHAN_NO_HT: + return true; + case NL80211_CHAN_HT20: + if (oper_ct != NL80211_CHAN_NO_HT) + return true; + return false; + case NL80211_CHAN_HT40MINUS: + case NL80211_CHAN_HT40PLUS: + return (wk_ct == oper_ct); + } + WARN_ON(1); /* shouldn't get here */ + return false; +} + +static enum nl80211_channel_type +ieee80211_calc_ct(enum nl80211_channel_type wk_ct, + enum nl80211_channel_type oper_ct) +{ + switch (wk_ct) { + case NL80211_CHAN_NO_HT: + return oper_ct; + case NL80211_CHAN_HT20: + if (oper_ct != NL80211_CHAN_NO_HT) + return oper_ct; + return wk_ct; + case NL80211_CHAN_HT40MINUS: + case NL80211_CHAN_HT40PLUS: + return wk_ct; + } + WARN_ON(1); /* shouldn't get here */ + return wk_ct; +} + + static void ieee80211_work_timer(unsigned long data) { struct ieee80211_local *local = (void *) data; @@ -927,14 +965,22 @@ static void ieee80211_work_work(struct work_struct *work) bool on_oper_chan; bool tmp_chan_changed = false; bool on_oper_chan2; + enum nl80211_channel_type wk_ct; on_oper_chan = ieee80211_cfg_on_oper_channel(local); + + /* Work with existing channel type if possible. */ + wk_ct = wk->chan_type; + if (wk->chan == local->hw.conf.channel) + wk_ct = ieee80211_calc_ct(wk->chan_type, + local->hw.conf.channel_type); + if (local->tmp_channel) if ((local->tmp_channel != wk->chan) || - (local->tmp_channel_type != wk->chan_type)) + (local->tmp_channel_type != wk_ct)) tmp_chan_changed = true; local->tmp_channel = wk->chan; - local->tmp_channel_type = wk->chan_type; + local->tmp_channel_type = wk_ct; /* * Leave the station vifs in awake mode if they * happen to be on the same channel as @@ -1031,7 +1077,8 @@ static void ieee80211_work_work(struct work_struct *work) continue; if (wk->chan != local->tmp_channel) continue; - if (wk->chan_type != local->tmp_channel_type) + if (ieee80211_work_ct_coexists(wk->chan_type, + local->tmp_channel_type)) continue; remain_off_channel = true; } -- cgit v1.1 From 59bdf3b0fe7a183f18ce94696259c4c76abb4568 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Mon, 7 Feb 2011 13:44:38 -0800 Subject: mac80211: Ensure power-level set properly for scanning. My previous patch to optimize scanning on operating channel accidentally removed the code that would ensure power was set to maximum for scanning. This patch re-adds that functionality. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/main.c | 3 ++- net/mac80211/scan.c | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 86562ce..e7eb2cf 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -199,7 +199,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) changed |= IEEE80211_CONF_CHANGE_SMPS; } - if (scan_chan) + if ((local->scanning & SCAN_SW_SCANNING) || + (local->scanning & SCAN_HW_SCANNING)) power = chan->max_power; else power = local->power_constr_level ? diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0e70d7a..8429545 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -307,11 +307,15 @@ static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, mutex_lock(&local->mtx); on_oper_chan = ieee80211_cfg_on_oper_channel(local); + WARN_ON(local->scanning & (SCAN_SW_SCANNING | SCAN_HW_SCANNING)); + if (was_hw_scan || !on_oper_chan) { if (WARN_ON(local->scan_channel)) local->scan_channel = NULL; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } + } else + /* Set power back to normal operating levels. */ + ieee80211_hw_config(local, 0); if (!was_hw_scan) { bool on_oper_chan2; @@ -377,6 +381,9 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local) ieee80211_configure_filter(local); + /* We need to set power level at maximum rate for scanning. */ + ieee80211_hw_config(local, 0); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, IEEE80211_CHANNEL_TIME); -- cgit v1.1 From b6644cb706610874104dbf3359e3b67aa59cbc27 Mon Sep 17 00:00:00 2001 From: Xiaotian Feng Date: Wed, 9 Feb 2011 19:16:15 -0800 Subject: net: rename group sysfs entry to netdev_group commit a512b92 adds sysfs entry for net device group, but before this commit, tun also uses group sysfs, so after this commit checkin, kernel warns like this: sysfs: cannot create duplicate filename '/devices/virtual/net/vnet0/group' Since tun has used this for years, rename sysfs under tun might break existing userspace, so rename group sysfs entry for net device group is a better choice. Signed-off-by: Xiaotian Feng Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2e4a393..5ceb257 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -330,7 +330,7 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), - __ATTR(group, S_IRUGO | S_IWUSR, show_group, store_group), + __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group), {} }; -- cgit v1.1 From c16e19c11730199c1df686b160c9c972ad28baf8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Feb 2011 10:13:07 +0100 Subject: netfilter: ipset: add dependency on CONFIG_NETFILTER_NETLINK When SYSCTL and PROC_FS and NETFILTER_NETLINK are not enabled: net/built-in.o: In function `try_to_load_type': ip_set_core.c:(.text+0x3ab49): undefined reference to `nfnl_unlock' ip_set_core.c:(.text+0x3ab4e): undefined reference to `nfnl_lock' ... Reported-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Patrick McHardy --- net/netfilter/ipset/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index 3b970d3..2c5b348 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -1,6 +1,7 @@ menuconfig IP_SET tristate "IP set support" depends on INET && NETFILTER + depends on NETFILTER_NETLINK help This option adds IP set support to the kernel. In order to define and use the sets, you need the userspace utility -- cgit v1.1 From 7a71ed899e77cc822abb863e24a422dcf7e9fa33 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Feb 2011 14:30:26 -0800 Subject: inetpeer: Abstract address representation further. Future changes will add caching information, and some of these new elements will be addresses. Since the family is implicit via the ->daddr.family member, replicating the family in ever address we store is entirely redundant. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 6 +++--- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 709fbb4..4346c38 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -167,9 +167,9 @@ static int addr_compare(const struct inetpeer_addr *a, int i, n = (a->family == AF_INET ? 1 : 4); for (i = 0; i < n; i++) { - if (a->a6[i] == b->a6[i]) + if (a->addr.a6[i] == b->addr.a6[i]) continue; - if (a->a6[i] < b->a6[i]) + if (a->addr.a6[i] < b->addr.a6[i]) return -1; return 1; } @@ -510,7 +510,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); + atomic_set(&p->ip_id_count, secure_ip_id(daddr->addr.a4)); p->tcp_ts_stamp = 0; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 02f583b..e2b9be2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1341,7 +1341,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && - peer->daddr.a4 == saddr) { + peer->daddr.addr.a4 == saddr) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 20aa95e..d6954e3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1323,7 +1323,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet6_csk_route_req(sk, req)) != NULL && (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL && - ipv6_addr_equal((struct in6_addr *)peer->daddr.a6, + ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6, &treq->rmt_addr)) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && -- cgit v1.1 From ddd4aa424b866a08ceba7ddf38e61542c91b93a0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Feb 2011 15:36:47 -0800 Subject: inetpeer: Add redirect and PMTU discovery cached info. Validity of the cached PMTU information is indicated by it's expiration value being non-zero, just as per dst->expires. The scheme we will use is that we will remember the pre-ICMP value held in the metrics or route entry, and then at expiration time we will restore that value. In this way PMTU expiration does not kill off the cached route as is done currently. Redirect information is permanent, or at least until another redirect is received. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 4346c38..48f8d45 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -515,6 +515,8 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; + p->pmtu_expires = 0; + memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->unused); -- cgit v1.1 From 6431cbc25fa21635ee04eb0516ba6c51389fbfac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Feb 2011 20:38:06 -0800 Subject: inet: Create a mechanism for upward inetpeer propagation into routes. If we didn't have a routing cache, we would not be able to properly propagate certain kinds of dynamic path attributes, for example PMTU information and redirects. The reason is that if we didn't have a routing cache, then there would be no way to lookup all of the active cached routes hanging off of sockets, tunnels, IPSEC bundles, etc. Consider the case where we created a cached route, but no inetpeer entry existed and also we were not asked to pre-COW the route metrics and therefore did not force the creation a new inetpeer entry. If we later get a PMTU message, or a redirect, and store this information in a new inetpeer entry, there is no way to teach that cached route about the newly existing inetpeer entry. The facilities implemented here handle this problem. First we create a generation ID. When we create a cached route of any kind, we remember the generation ID at the time of attachment. Any time we force-create an inetpeer entry in response to new path information, we bump that generation ID. The dst_ops->check() callback is where the knowledge of this event is propagated. If the global generation ID does not equal the one stored in the cached route, and the cached route has not attached to an inetpeer yet, we look it up and attach if one is found. Now that we've updated the cached route's information, we update the route's generation ID too. This clears the way for implementing PMTU and redirects directly in the inetpeer cache. There is absolutely no need to consult cached route information in order to maintain this information. At this point nothing bumps the inetpeer genids, that comes in the later changes which handle PMTUs and redirects using inetpeers. Signed-off-by: David S. Miller --- net/ipv4/route.c | 19 ++++++++++++++++++- net/ipv6/route.c | 18 ++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0455af8..0979e03 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1308,6 +1308,13 @@ skip_hashing: return 0; } +static atomic_t __rt_peer_genid = ATOMIC_INIT(0); + +static u32 rt_peer_genid(void) +{ + return atomic_read(&__rt_peer_genid); +} + void rt_bind_peer(struct rtable *rt, int create) { struct inet_peer *peer; @@ -1316,6 +1323,8 @@ void rt_bind_peer(struct rtable *rt, int create) if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); + else + rt->rt_peer_genid = rt_peer_genid(); } /* @@ -1767,8 +1776,16 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { - if (rt_is_expired((struct rtable *)dst)) + struct rtable *rt = (struct rtable *) dst; + + if (rt_is_expired(rt)) return NULL; + if (rt->rt_peer_genid != rt_peer_genid()) { + if (!rt->peer) + rt_bind_peer(rt, 0); + + rt->rt_peer_genid = rt_peer_genid(); + } return dst; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 12ec83d..ad8556e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -240,6 +240,13 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } +static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); + +static u32 rt6_peer_genid(void) +{ + return atomic_read(&__rt6_peer_genid); +} + void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer *peer; @@ -247,6 +254,8 @@ void rt6_bind_peer(struct rt6_info *rt, int create) peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) inet_putpeer(peer); + else + rt->rt6i_peer_genid = rt6_peer_genid(); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -912,9 +921,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { + if (rt->rt6i_peer_genid != rt6_peer_genid()) { + if (!rt->rt6i_peer) + rt6_bind_peer(rt, 0); + rt->rt6i_peer_genid = rt6_peer_genid(); + } return dst; - + } return NULL; } -- cgit v1.1 From c2f7f0e7b3ce55eee32892d6aa5cd88a7512ea25 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 10 Feb 2011 14:33:56 +0000 Subject: batman-adv: Use successive sequence numbers for fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two fragments of an unicast packet must have successive sequence numbers to allow the receiver side to detect matching fragments and merge them again. The current implementation doesn't provide that property because a sequence of two atomic_inc_return may be interleaved with another sequence which also changes the variable. The access to the fragment sequence number pool has either to be protected by correct locking or it has to reserve two sequence numbers in a single fetch. The latter one can easily be done by increasing the value of the last used sequence number by 2 in a single step. The generated window of two currently unused sequence numbers can now be scattered across the two fragments. Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann --- net/batman-adv/unicast.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index cbf022c..9b2a222 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -226,6 +226,7 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, int ucf_hdr_len = sizeof(struct unicast_frag_packet); int data_len = skb->len - uc_hdr_len; int large_tail = 0; + uint16_t seqno; if (!bat_priv->primary_if) goto dropped; @@ -261,10 +262,9 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, frag1->flags = UNI_FRAG_HEAD | large_tail; frag2->flags = large_tail; - frag1->seqno = htons((uint16_t)atomic_inc_return( - &batman_if->frag_seqno)); - frag2->seqno = htons((uint16_t)atomic_inc_return( - &batman_if->frag_seqno)); + seqno = atomic_add_return(2, &batman_if->frag_seqno); + frag1->seqno = htons(seqno - 1); + frag2->seqno = htons(seqno); send_skb_packet(skb, batman_if, dstaddr); send_skb_packet(frag_skb, batman_if, dstaddr); -- cgit v1.1 From 44bd4de9c2270b22c3c898310102bc6be9ed2978 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Fri, 11 Feb 2011 18:00:07 +0100 Subject: netfilter: xt_connlimit: connlimit-above early loop termination The patch below introduces an early termination of the loop that is counting matches. It terminates once the counter has exceeded the threshold provided by the user. There's no point in continuing the loop afterwards and looking at other entries. It plays together with the following code further below: return (connections > info->limit) ^ info->inverse; where connections is the result of the counted connection, which in turn is the matches variable in the loop. So once -> matches = info->limit + 1 alias -> matches > info->limit alias -> matches > threshold we can terminate the loop. Signed-off-by: Stefan Berger Signed-off-by: Patrick McHardy --- net/netfilter/xt_connlimit.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index e029c48..82ce7c5 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -97,7 +97,8 @@ static int count_them(struct net *net, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u_int8_t family) + u_int8_t family, + unsigned int threshold) { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; @@ -151,9 +152,14 @@ static int count_them(struct net *net, continue; } - if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) + if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) { /* same source network -> be counted! */ ++matches; + if (matches > threshold) { + nf_ct_put(found_ct); + break; + } + } nf_ct_put(found_ct); } @@ -207,7 +213,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) spin_lock_bh(&info->data->lock); connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, par->family); + &info->mask, par->family, + info->limit); spin_unlock_bh(&info->data->lock); if (connections < 0) -- cgit v1.1 From e3fb592b15602196d38b225dc78aab8d631a5f89 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Thu, 10 Feb 2011 20:59:42 +0300 Subject: Bluetooth: l2cap: fix 1 byte infoleak to userspace Structure l2cap_options has one padding byte between max_tx and txwin_size fields. This byte in "opts" is copied to userspace uninitialized. Signed-off-by: Vasiliy Kulikov Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index adf4169..21f5385 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -392,6 +392,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us switch (optname) { case L2CAP_OPTIONS: + memset(&opts, 0, sizeof(opts)); opts.imtu = l2cap_pi(sk)->imtu; opts.omtu = l2cap_pi(sk)->omtu; opts.flush_to = l2cap_pi(sk)->flush_to; -- cgit v1.1 From 14062064167ecdda4a17ec9190740c189223550a Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 8 Feb 2011 12:43:54 +0000 Subject: batman-adv: Split combined variable declarations Multiple variable declarations in a single statements over multiple lines can be split into multiple variable declarations without changing the actual behavior. Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann --- net/batman-adv/unicast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 9b2a222..6c92eef 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -39,8 +39,8 @@ static struct sk_buff *frag_merge_packet(struct list_head *head, (struct unicast_frag_packet *)skb->data; struct sk_buff *tmp_skb; struct unicast_packet *unicast_packet; - int hdr_len = sizeof(struct unicast_packet), - uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; + int hdr_len = sizeof(struct unicast_packet); + int uni_diff = sizeof(struct unicast_frag_packet) - hdr_len; /* set skb to the first part and tmp_skb to the second part */ if (up->flags & UNI_FRAG_HEAD) { -- cgit v1.1 From ee1e884194eb19574898ce6d5eaef5e8afdec7f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sun, 6 Feb 2011 23:08:37 +0000 Subject: batman-adv: Remove duplicate types.h inclusions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit types.h is included by main.h, which is included at the beginning of any other c-file anyway. Therefore this commit removes those duplicate inclussions. Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann --- net/batman-adv/icmp_socket.c | 1 - net/batman-adv/icmp_socket.h | 2 -- net/batman-adv/main.c | 1 - net/batman-adv/routing.c | 1 - net/batman-adv/routing.h | 2 -- net/batman-adv/send.c | 1 - net/batman-adv/send.h | 2 -- net/batman-adv/soft-interface.c | 1 - net/batman-adv/translation-table.c | 1 - net/batman-adv/translation-table.h | 2 -- 10 files changed, 14 deletions(-) (limited to 'net') diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 5e86d6f..319a7cc 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -24,7 +24,6 @@ #include #include "icmp_socket.h" #include "send.h" -#include "types.h" #include "hash.h" #include "originator.h" #include "hard-interface.h" diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 08b1859..462b190 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_ICMP_SOCKET_H_ #define _NET_BATMAN_ADV_ICMP_SOCKET_H_ -#include "types.h" - #define ICMP_SOCKET "socket" void bat_socket_init(void); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index dc9248d..06d956c 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -30,7 +30,6 @@ #include "translation-table.h" #include "hard-interface.h" #include "gateway_client.h" -#include "types.h" #include "vis.h" #include "hash.h" diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 028f739..8274140 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -28,7 +28,6 @@ #include "icmp_socket.h" #include "translation-table.h" #include "originator.h" -#include "types.h" #include "ring_buffer.h" #include "vis.h" #include "aggregation.h" diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index ceeca6f..a09d16f 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -#include "types.h" - void slide_own_bcast_window(struct batman_if *batman_if); void receive_bat_packet(struct ethhdr *ethhdr, struct batman_packet *batman_packet, diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 7cc620e..8314276 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -25,7 +25,6 @@ #include "translation-table.h" #include "soft-interface.h" #include "hard-interface.h" -#include "types.h" #include "vis.h" #include "aggregation.h" #include "gateway_common.h" diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index bc53ade..b68c272 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_SEND_H_ #define _NET_BATMAN_ADV_SEND_H_ -#include "types.h" - int send_skb_packet(struct sk_buff *skb, struct batman_if *batman_if, uint8_t *dst_addr); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 145e0f7..bd088f8 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -26,7 +26,6 @@ #include "send.h" #include "bat_debugfs.h" #include "translation-table.h" -#include "types.h" #include "hash.h" #include "gateway_common.h" #include "gateway_client.h" diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index f6917dd..7fb6726 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -22,7 +22,6 @@ #include "main.h" #include "translation-table.h" #include "soft-interface.h" -#include "types.h" #include "hash.h" #include "originator.h" diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index a4f3a37..f19931c 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -22,8 +22,6 @@ #ifndef _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ -#include "types.h" - int hna_local_init(struct bat_priv *bat_priv); void hna_local_add(struct net_device *soft_iface, uint8_t *addr); void hna_local_remove(struct bat_priv *bat_priv, -- cgit v1.1 From 3878f1f075470990d9c2418b53f31694e774f743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 7 Feb 2011 00:14:40 +0000 Subject: batman-adv: Disallow originator addressing within mesh layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a host in the mesh network, the batman layer should be transparent. However, we had one exception, data packets within the mesh network which have the same destination as a originator are being routed to that node, although there is no host that node's bat0 interface and therefore gets dropped anyway. This commit removes this exception. Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann --- net/batman-adv/unicast.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 6c92eef..1b5e761 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -281,7 +281,7 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct unicast_packet *unicast_packet; - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; struct batman_if *batman_if; struct neigh_node *router; int data_len = skb->len; @@ -292,11 +292,6 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) /* get routing information */ if (is_multicast_ether_addr(ethhdr->h_dest)) orig_node = (struct orig_node *)gw_get_selected(bat_priv); - else - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, - choose_orig, - ethhdr->h_dest)); /* check for hna host */ if (!orig_node) -- cgit v1.1 From 57f89bfa21403d902ee176ef988136b75d9ab30b Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 11 Feb 2011 09:35:18 +0000 Subject: network: Allow af_packet to transmit +4 bytes for VLAN packets. This allows user-space to send a '1500' MTU VLAN packet on a 1500 MTU ethernet frame. The extra 4 bytes of a VLAN header is not usually charged against the MTU when other parts of the network stack is transmitting vlans... Signed-off-by: Ben Greear Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c60649e..5efef5b 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -465,7 +465,7 @@ retry: */ err = -EMSGSIZE; - if (len > dev->mtu + dev->hard_header_len) + if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN) goto out_unlock; if (!skb) { @@ -496,6 +496,19 @@ retry: goto retry; } + if (len > (dev->mtu + dev->hard_header_len)) { + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + struct ethhdr *ehdr; + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) { + err = -EMSGSIZE; + goto out_unlock; + } + } skb->protocol = proto; skb->dev = dev; @@ -1199,7 +1212,7 @@ static int packet_snd(struct socket *sock, } err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu+reserve)) + if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN)) goto out_unlock; err = -ENOBUFS; @@ -1224,6 +1237,20 @@ static int packet_snd(struct socket *sock, if (err < 0) goto out_free; + if (!gso_type && (len > dev->mtu + reserve)) { + /* Earlier code assumed this would be a VLAN pkt, + * double-check this now that we have the actual + * packet in hand. + */ + struct ethhdr *ehdr; + skb_reset_mac_header(skb); + ehdr = eth_hdr(skb); + if (ehdr->h_proto != htons(ETH_P_8021Q)) { + err = -EMSGSIZE; + goto out_free; + } + } + skb->protocol = proto; skb->dev = dev; skb->priority = sk->sk_priority; -- cgit v1.1 From d59cfde2fb960b5970ccb5a38cea25d38b37a8e8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Feb 2011 00:46:06 +0000 Subject: net: remove the unnecessary dance around skb_bond_should_drop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to check (master) twice and to drive in and out the header file. Signed-off-by: Jiri Pirko Reviewed-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6392ea0..d874fd1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3105,7 +3105,8 @@ static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and * ARP on active-backup slaves with arp_validate enabled. */ -int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) +static int __skb_bond_should_drop(struct sk_buff *skb, + struct net_device *master) { struct net_device *dev = skb->dev; @@ -3139,7 +3140,6 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) } return 0; } -EXPORT_SYMBOL(__skb_bond_should_drop); static int __netif_receive_skb(struct sk_buff *skb) { @@ -3177,7 +3177,7 @@ static int __netif_receive_skb(struct sk_buff *skb) if (skb->deliver_no_wcard) null_or_orig = orig_dev; else if (master) { - if (skb_bond_should_drop(skb, master)) { + if (__skb_bond_should_drop(skb, master)) { skb->deliver_no_wcard = 1; null_or_orig = orig_dev; /* deliver only exact match */ } else -- cgit v1.1 From 1765a575334f1a232c1478accdee5c7d19f4b3e3 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Feb 2011 06:48:36 +0000 Subject: net: make dev->master general dev->master is now tightly connected to bonding driver. This patch makes this pointer more general and ready to be used by others. - netdev_set_master() - bond specifics moved to new function netdev_set_bond_master() - introduced netif_is_bond_slave() to check if device is a bonding slave Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 49 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d874fd1..a413276 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3146,7 +3146,6 @@ static int __netif_receive_skb(struct sk_buff *skb) struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *master; struct net_device *null_or_orig; struct net_device *orig_or_bond; int ret = NET_RX_DROP; @@ -3173,15 +3172,19 @@ static int __netif_receive_skb(struct sk_buff *skb) */ null_or_orig = NULL; orig_dev = skb->dev; - master = ACCESS_ONCE(orig_dev->master); if (skb->deliver_no_wcard) null_or_orig = orig_dev; - else if (master) { - if (__skb_bond_should_drop(skb, master)) { - skb->deliver_no_wcard = 1; - null_or_orig = orig_dev; /* deliver only exact match */ - } else - skb->dev = master; + else if (netif_is_bond_slave(orig_dev)) { + struct net_device *bond_master = ACCESS_ONCE(orig_dev->master); + + if (likely(bond_master)) { + if (__skb_bond_should_drop(skb, bond_master)) { + skb->deliver_no_wcard = 1; + /* deliver only exact match */ + null_or_orig = orig_dev; + } else + skb->dev = bond_master; + } } __this_cpu_inc(softnet_data.processed); @@ -4346,15 +4349,14 @@ static int __init dev_proc_init(void) /** - * netdev_set_master - set up master/slave pair + * netdev_set_master - set up master pointer * @slave: slave device * @master: new master device * * Changes the master device of the slave. Pass %NULL to break the * bonding. The caller must hold the RTNL semaphore. On a failure * a negative errno code is returned. On success the reference counts - * are adjusted, %RTM_NEWLINK is sent to the routing socket and the - * function returns zero. + * are adjusted and the function returns zero. */ int netdev_set_master(struct net_device *slave, struct net_device *master) { @@ -4374,6 +4376,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) synchronize_net(); dev_put(old); } + return 0; +} +EXPORT_SYMBOL(netdev_set_master); + +/** + * netdev_set_bond_master - set up bonding master/slave pair + * @slave: slave device + * @master: new master device + * + * Changes the master device of the slave. Pass %NULL to break the + * bonding. The caller must hold the RTNL semaphore. On a failure + * a negative errno code is returned. On success %RTM_NEWLINK is sent + * to the routing socket and the function returns zero. + */ +int netdev_set_bond_master(struct net_device *slave, struct net_device *master) +{ + int err; + + ASSERT_RTNL(); + + err = netdev_set_master(slave, master); + if (err) + return err; if (master) slave->flags |= IFF_SLAVE; else @@ -4382,7 +4407,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); return 0; } -EXPORT_SYMBOL(netdev_set_master); +EXPORT_SYMBOL(netdev_set_bond_master); static void dev_change_rx_flags(struct net_device *dev, int flags) { -- cgit v1.1 From fbaec0ea54f7d9131891ff98744e82c073ce03b1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 13 Feb 2011 10:15:37 +0000 Subject: rtnetlink: implement setting of master device This patch allows userspace to enslave/release slave devices via netlink interface using IFLA_MASTER. This introduces generic way to add/remove underling devices. Signed-off-by: Jiri Pirko Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index da0fe45..49f7ea5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1036,6 +1036,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, + [IFLA_MASTER] = { .type = NLA_U32 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, @@ -1178,6 +1179,41 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) return err; } +static int do_set_master(struct net_device *dev, int ifindex) +{ + struct net_device *master_dev; + const struct net_device_ops *ops; + int err; + + if (dev->master) { + if (dev->master->ifindex == ifindex) + return 0; + ops = dev->master->netdev_ops; + if (ops->ndo_del_slave) { + err = ops->ndo_del_slave(dev->master, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + + if (ifindex) { + master_dev = __dev_get_by_index(dev_net(dev), ifindex); + if (!master_dev) + return -EINVAL; + ops = master_dev->netdev_ops; + if (ops->ndo_add_slave) { + err = ops->ndo_add_slave(master_dev, dev); + if (err) + return err; + } else { + return -EOPNOTSUPP; + } + } + return 0; +} + static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { @@ -1301,6 +1337,13 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; } + if (tb[IFLA_MASTER]) { + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + if (err) + goto errout; + modified = 1; + } + if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); -- cgit v1.1 From afc6151a78a43bdca5f64a8bd3e3c13837580c54 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 13 Feb 2011 09:33:42 +0000 Subject: bridge: implement [add/del]_slave ops add possibility to addif/delif via rtnetlink Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/bridge/br_device.c | 17 +++++++++++++++++ net/bridge/br_if.c | 11 ++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 5564435..1461b19 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -297,6 +297,21 @@ void br_netpoll_disable(struct net_bridge_port *p) #endif +static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) + +{ + struct net_bridge *br = netdev_priv(dev); + + return br_add_if(br, slave_dev); +} + +static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) +{ + struct net_bridge *br = netdev_priv(dev); + + return br_del_if(br, slave_dev); +} + static const struct ethtool_ops br_ethtool_ops = { .get_drvinfo = br_getinfo, .get_link = ethtool_op_get_link, @@ -326,6 +341,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_netpoll_cleanup = br_netpoll_cleanup, .ndo_poll_controller = br_poll_controller, #endif + .ndo_add_slave = br_add_slave, + .ndo_del_slave = br_del_slave, }; static void br_dev_free(struct net_device *dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2a6801d..dce8f00 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -148,6 +148,8 @@ static void del_nbp(struct net_bridge_port *p) netdev_rx_handler_unregister(dev); + netdev_set_master(dev, NULL); + br_multicast_del_port(p); kobject_uevent(&p->kobj, KOBJ_REMOVE); @@ -429,10 +431,14 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) goto err3; - err = netdev_rx_handler_register(dev, br_handle_frame, p); + err = netdev_set_master(dev, br->dev); if (err) goto err3; + err = netdev_rx_handler_register(dev, br_handle_frame, p); + if (err) + goto err4; + dev->priv_flags |= IFF_BRIDGE_PORT; dev_disable_lro(dev); @@ -455,6 +461,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) kobject_uevent(&p->kobj, KOBJ_ADD); return 0; + +err4: + netdev_set_master(dev, NULL); err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: -- cgit v1.1 From 43629f8f5ea32a998d06d1bb41eefa0e821ff573 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Mon, 14 Feb 2011 13:54:31 +0300 Subject: Bluetooth: bnep: fix buffer overflow Struct ca is copied from userspace. It is not checked whether the "device" field is NULL terminated. This potentially leads to BUG() inside of alloc_netdev_mqs() and/or information leak by creating a device with a name made of contents of kernel stack. Signed-off-by: Vasiliy Kulikov Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/sock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 2862f53..d935da7 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -88,6 +88,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long sockfd_put(nsock); return -EBADFD; } + ca.device[sizeof(ca.device)-1] = 0; err = bnep_add_connection(&ca, nsock); if (!err) { -- cgit v1.1 From c4c896e1471aec3b004a693c689f60be3b17ac86 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Mon, 14 Feb 2011 13:54:26 +0300 Subject: Bluetooth: sco: fix information leak to userspace struct sco_conninfo has one padding byte in the end. Local variable cinfo of type sco_conninfo is copied to userspace with this uninizialized one byte, leading to old stack contents leak. Signed-off-by: Vasiliy Kulikov Signed-off-by: Gustavo F. Padovan --- net/bluetooth/sco.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 960c6d1..926ed39 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -703,6 +703,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user break; } + memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle; memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3); -- cgit v1.1 From d846f71195d57b0bbb143382647c2c6638b04c5a Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Mon, 14 Feb 2011 16:49:23 +0100 Subject: bridge: netfilter: fix information leak Struct tmp is copied from userspace. It is not checked whether the "name" field is NULL terminated. This may lead to buffer overflow and passing contents of kernel stack as a module name to try_then_request_module() and, consequently, to modprobe commandline. It would be seen by all userspace processes. Signed-off-by: Vasiliy Kulikov Signed-off-by: Patrick McHardy --- net/bridge/netfilter/ebtables.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5f1825d..893669c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1107,6 +1107,8 @@ static int do_replace(struct net *net, const void __user *user, if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter)) return -ENOMEM; + tmp.name[sizeof(tmp.name) - 1] = 0; + countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; newinfo = vmalloc(sizeof(*newinfo) + countersize); if (!newinfo) -- cgit v1.1 From 20b7975e5aefc7fd08b7f582f3901b1669725cd0 Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Mon, 14 Feb 2011 16:54:33 +0100 Subject: Revert "netfilter: xt_connlimit: connlimit-above early loop termination" This reverts commit 44bd4de9c2270b22c3c898310102bc6be9ed2978. I have to revert the early loop termination in connlimit since it generates problems when an iptables statement does not use -m state --state NEW before the connlimit match extension. Signed-off-by: Stefan Berger Signed-off-by: Patrick McHardy --- net/netfilter/xt_connlimit.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 82ce7c5..e029c48 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -97,8 +97,7 @@ static int count_them(struct net *net, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u_int8_t family, - unsigned int threshold) + u_int8_t family) { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; @@ -152,14 +151,9 @@ static int count_them(struct net *net, continue; } - if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) { + if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) /* same source network -> be counted! */ ++matches; - if (matches > threshold) { - nf_ct_put(found_ct); - break; - } - } nf_ct_put(found_ct); } @@ -213,8 +207,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) spin_lock_bh(&info->data->lock); connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, par->family, - info->limit); + &info->mask, par->family); spin_unlock_bh(&info->data->lock); if (connections < 0) -- cgit v1.1 From a2361c8735e07322023aedc36e4938b35af31eb0 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 14 Feb 2011 17:28:55 +0100 Subject: netfilter: xt_conntrack: warn about use in raw table nfct happens to run after the raw table only. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_conntrack.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 4ef1b63..2c0086a 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -272,6 +272,11 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; + if (strcmp(par->table, "raw") == 0) { + pr_info("state is undetermined at the time of raw table\n"); + return -EINVAL; + } + ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", -- cgit v1.1 From ac7100ba93428a26cde8e47dfcfcfbfbfcce66de Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 14 Feb 2011 19:02:23 +0000 Subject: sch_mqprio: Always set num_tc to 0 in mqprio_destroy() All the cleanup code in mqprio_destroy() is currently conditional on priv->qdiscs being non-null, but that condition should only apply to the per-queue qdisc cleanup. We should always set the number of traffic classes back to 0 here. Signed-off-by: Ben Hutchings --- net/sched/sch_mqprio.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index effd4ee..ace37f9 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -29,18 +29,18 @@ static void mqprio_destroy(struct Qdisc *sch) struct mqprio_sched *priv = qdisc_priv(sch); unsigned int ntx; - if (!priv->qdiscs) - return; - - for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + if (priv->qdiscs) { + for (ntx = 0; + ntx < dev->num_tx_queues && priv->qdiscs[ntx]; + ntx++) + qdisc_destroy(priv->qdiscs[ntx]); + kfree(priv->qdiscs); + } if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) dev->netdev_ops->ndo_setup_tc(dev, 0); else netdev_set_num_tc(dev, 0); - - kfree(priv->qdiscs); } static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) -- cgit v1.1 From 31d409373cca3517a30540b51f55dcb1f5af0d49 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Feb 2011 11:23:04 -0800 Subject: ipv4: fix rcu lock imbalance in fib_select_default() Commit 0c838ff1ade7 (ipv4: Consolidate all default route selection implementations.) forgot to remove one rcu_read_unlock() from fib_select_default(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 146bd82..562f34c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1189,7 +1189,7 @@ void fib_select_default(struct fib_result *res) fib_result_assign(res, last_resort); tb->tb_default = last_idx; out: - rcu_read_unlock(); + return; } #ifdef CONFIG_IP_ROUTE_MULTIPATH -- cgit v1.1 From 642745184f82688eb3ef0cdfaa4ba632055be9af Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Mon, 7 Feb 2011 20:08:52 -0200 Subject: Bluetooth: Merge L2CAP and SCO modules into bluetooth.ko Actually doesn't make sense have these modules built separately. The L2CAP layer is needed by almost all Bluetooth protocols and profiles. There isn't any real use case without having L2CAP loaded. SCO is only essential for Audio transfers, but it is so small that we can have it loaded always in bluetooth.ko without problems. If you really doesn't want it you can disable SCO in the kernel config. Signed-off-by: Gustavo F. Padovan --- net/bluetooth/Kconfig | 10 ++-------- net/bluetooth/Makefile | 5 ++--- net/bluetooth/af_bluetooth.c | 32 ++++++++++++++++++++++++++++++-- net/bluetooth/l2cap_core.c | 16 ++-------------- net/bluetooth/sco.c | 16 ++-------------- 5 files changed, 38 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index e45eae6..c6f9c2f 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -32,7 +32,7 @@ menuconfig BT more information, see . config BT_L2CAP - tristate "L2CAP protocol support" + bool "L2CAP protocol support" depends on BT select CRC16 help @@ -40,19 +40,13 @@ config BT_L2CAP connection oriented and connection-less data transport. L2CAP support is required for most Bluetooth applications. - Say Y here to compile L2CAP support into the kernel or say M to - compile it as module (l2cap). - config BT_SCO - tristate "SCO links support" + bool "SCO links support" depends on BT help SCO link provides voice transport over Bluetooth. SCO support is required for voice applications like Headset and Audio. - Say Y here to compile SCO support into the kernel or say M to - compile it as module (sco). - source "net/bluetooth/rfcomm/Kconfig" source "net/bluetooth/bnep/Kconfig" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 339b429..f04fe9a 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -3,12 +3,11 @@ # obj-$(CONFIG_BT) += bluetooth.o -obj-$(CONFIG_BT_L2CAP) += l2cap.o -obj-$(CONFIG_BT_SCO) += sco.o obj-$(CONFIG_BT_RFCOMM) += rfcomm/ obj-$(CONFIG_BT_BNEP) += bnep/ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o -l2cap-y := l2cap_core.o l2cap_sock.o +bluetooth-$(CONFIG_BT_L2CAP) += l2cap_core.o l2cap_sock.o +bluetooth-$(CONFIG_BT_SCO) += sco.o diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 2abfe2f..c258027 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -40,7 +40,7 @@ #include -#define VERSION "2.15" +#define VERSION "2.16" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 @@ -545,13 +545,41 @@ static int __init bt_init(void) BT_INFO("HCI device and connection manager initialized"); - hci_sock_init(); + err = hci_sock_init(); + if (err < 0) + goto error; + + err = l2cap_init(); + if (err < 0) { + hci_sock_cleanup(); + goto sock_err; + } + + err = sco_init(); + if (err < 0) { + l2cap_exit(); + goto sock_err; + } return 0; + +sock_err: + hci_sock_cleanup(); + +error: + sock_unregister(PF_BLUETOOTH); + bt_sysfs_cleanup(); + + return err; } static void __exit bt_exit(void) { + + sco_exit(); + + l2cap_exit(); + hci_sock_cleanup(); sock_unregister(PF_BLUETOOTH); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ba7f9da..6f054d9 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -55,8 +55,6 @@ #include #include -#define VERSION "2.15" - int disable_ertm; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; @@ -3806,7 +3804,7 @@ static struct hci_proto l2cap_hci_proto = { .recv_acldata = l2cap_recv_acldata }; -static int __init l2cap_init(void) +int __init l2cap_init(void) { int err; @@ -3834,7 +3832,6 @@ static int __init l2cap_init(void) BT_ERR("Failed to create L2CAP debug file"); } - BT_INFO("L2CAP ver %s", VERSION); BT_INFO("L2CAP socket layer initialized"); return 0; @@ -3845,7 +3842,7 @@ error: return err; } -static void __exit l2cap_exit(void) +void l2cap_exit(void) { debugfs_remove(l2cap_debugfs); @@ -3866,14 +3863,5 @@ void l2cap_load(void) } EXPORT_SYMBOL(l2cap_load); -module_init(l2cap_init); -module_exit(l2cap_exit); - module_param(disable_ertm, bool, 0644); MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); - -MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); -MODULE_VERSION(VERSION); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("bt-proto-0"); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 926ed39..c9348dd 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -50,8 +50,6 @@ #include #include -#define VERSION "0.6" - static int disable_esco; static const struct proto_ops sco_sock_ops; @@ -1024,7 +1022,7 @@ static struct hci_proto sco_hci_proto = { .recv_scodata = sco_recv_scodata }; -static int __init sco_init(void) +int __init sco_init(void) { int err; @@ -1052,7 +1050,6 @@ static int __init sco_init(void) BT_ERR("Failed to create SCO debug file"); } - BT_INFO("SCO (Voice Link) ver %s", VERSION); BT_INFO("SCO socket layer initialized"); return 0; @@ -1062,7 +1059,7 @@ error: return err; } -static void __exit sco_exit(void) +void __exit sco_exit(void) { debugfs_remove(sco_debugfs); @@ -1075,14 +1072,5 @@ static void __exit sco_exit(void) proto_unregister(&sco_proto); } -module_init(sco_init); -module_exit(sco_exit); - module_param(disable_esco, bool, 0644); MODULE_PARM_DESC(disable_esco, "Disable eSCO connection creation"); - -MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth SCO ver " VERSION); -MODULE_VERSION(VERSION); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("bt-proto-2"); -- cgit v1.1 From 0c2530cec5672f38f8ab834ee53d17175d3bca95 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Sat, 12 Feb 2011 14:17:15 +0100 Subject: mac80211: Remove superfluous if clause ieee80211_rx_h_check returned RX_DROP_MONITOR in case the if statement in question was true but the same return value is also used directly after the if clause. Hence, we can just drop the whole if clause and as such simplify the code. Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- net/mac80211/rx.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 045b2fe..f502634 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -832,18 +832,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && rx->sdata->vif.type != NL80211_IFTYPE_WDS && - (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { - if ((!ieee80211_has_fromds(hdr->frame_control) && - !ieee80211_has_tods(hdr->frame_control) && - ieee80211_is_data(hdr->frame_control)) || - !(status->rx_flags & IEEE80211_RX_RA_MATCH)) { - /* Drop IBSS frames and frames for other hosts - * silently. */ - return RX_DROP_MONITOR; - } - + (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) return RX_DROP_MONITOR; - } return RX_CONTINUE; } -- cgit v1.1 From c269a20393500e84e8cbae23ca6d65e1107433c4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 14 Feb 2011 12:20:22 +0100 Subject: mac80211: reply to directed probes in IBSS WFA certification and the WMM spec require that we always reply to unicast probe requests, so do that. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 775fb63..a42aa61 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -664,12 +664,13 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) } static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len) + struct sk_buff *req) { + struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(req); + struct ieee80211_mgmt *mgmt = (void *)req->data; struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; - int tx_last_beacon; + int tx_last_beacon, len = req->len; struct sk_buff *skb; struct ieee80211_mgmt *resp; u8 *pos, *end; @@ -689,7 +690,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, mgmt->bssid, tx_last_beacon); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - if (!tx_last_beacon) + if (!tx_last_beacon && !(rx_status->rx_flags & IEEE80211_RX_RA_MATCH)) return; if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 && @@ -786,7 +787,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: - ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_probe_req(sdata, skb); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, -- cgit v1.1 From c5d8b24ad0a9a45e163a6769b4eb7e7f1fb9aa7f Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Mon, 14 Feb 2011 13:31:09 -0800 Subject: ROSE: rose AX25 packet routing improvement FPAC AX25 packet application is using Linux kernel ROSE routing skills in order to connect or send packets to remote stations knowing their ROSE address via a network of interconnected nodes. Each FPAC node has a ROSE routing table that Linux ROSE module is looking at each time a ROSE frame is relayed by the node or when a connect request to a neighbor node is received. A previous patch improved the system time response by looking at already established routes each time the system was looking for a route to relay a frame. If a neighbor node routing the destination address was already connected, then the frame would be sent through him. If not, a connection request would be issued. The present patch extends the same routing capability to a connect request asked by a user locally connected into an FPAC node. Without this patch, a connect request was not well handled unless it was directed to an immediate connected neighbor of the local node. Implemented at a number of ROSE FPAC node stations, the present patch improved dramatically FPAC ROSE routing time response and efficiency. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/rose/rose_route.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index b4fdaac..88a77e9 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -674,29 +674,34 @@ struct rose_route *rose_route_free_lci(unsigned int lci, struct rose_neigh *neig * Find a neighbour or a route given a ROSE address. */ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, - unsigned char *diagnostic, int new) + unsigned char *diagnostic, int route_frame) { struct rose_neigh *res = NULL; struct rose_node *node; int failed = 0; int i; - if (!new) spin_lock_bh(&rose_node_list_lock); + if (!route_frame) spin_lock_bh(&rose_node_list_lock); for (node = rose_node_list; node != NULL; node = node->next) { if (rosecmpm(addr, &node->address, node->mask) == 0) { for (i = 0; i < node->count; i++) { - if (new) { - if (node->neighbour[i]->restarted) { - res = node->neighbour[i]; - goto out; - } + if (node->neighbour[i]->restarted) { + res = node->neighbour[i]; + goto out; } - else { + } + } + } + if (!route_frame) { /* connect request */ + for (node = rose_node_list; node != NULL; node = node->next) { + if (rosecmpm(addr, &node->address, node->mask) == 0) { + for (i = 0; i < node->count; i++) { if (!rose_ftimer_running(node->neighbour[i])) { res = node->neighbour[i]; + failed = 0; goto out; - } else - failed = 1; + } + failed = 1; } } } @@ -711,8 +716,7 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause, } out: - if (!new) spin_unlock_bh(&rose_node_list_lock); - + if (!route_frame) spin_unlock_bh(&rose_node_list_lock); return res; } -- cgit v1.1 From 68aa3fd551e9d54d98794852714dc1edbb21df77 Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Mon, 14 Feb 2011 13:33:49 -0800 Subject: ROSE: AX25: finding routes simplification With previous patch, rose_get_neigh() routine investigates the full list of neighbor nodes until it finds or not an already connected node whether it is called locally or through a level 3 transit frame. If no routes are opened through an adjacent connected node then a classical connect request is attempted. Then there is no more reason for an extra loop such as the one removed by this patch. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/rose/af_rose.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index d952e7e..5ee0c62 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -803,7 +803,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le rose_insert_socket(sk); /* Finish the bind */ } -rose_try_next_neigh: rose->dest_addr = addr->srose_addr; rose->dest_call = addr->srose_call; rose->rand = ((long)rose & 0xFFFF) + rose->lci; @@ -865,12 +864,6 @@ rose_try_next_neigh: } if (sk->sk_state != TCP_ESTABLISHED) { - /* Try next neighbour */ - rose->neighbour = rose_get_neigh(&addr->srose_addr, &cause, &diagnostic, 0); - if (rose->neighbour) - goto rose_try_next_neigh; - - /* No more neighbours */ sock->state = SS_UNCONNECTED; err = sock_error(sk); /* Always set at this point */ goto out_release; -- cgit v1.1 From 8d689218568174955129d0f0e9e4370a391b3609 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Mon, 14 Feb 2011 23:38:18 +0100 Subject: batman-adv: Remove two duplicate includes. Remove duplicate inclusion of "send.h" and "routing.h" from net/batman-adv/soft-interface.c Signed-off-by: Jesper Juhl Signed-off-by: Sven Eckelmann --- net/batman-adv/soft-interface.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index bd088f8..7e37077 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -29,14 +29,12 @@ #include "hash.h" #include "gateway_common.h" #include "gateway_client.h" -#include "send.h" #include "bat_sysfs.h" #include #include #include #include #include "unicast.h" -#include "routing.h" static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); -- cgit v1.1 From 2c8cec5c10bced2408082a6656170e74ac17231c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Feb 2011 20:42:07 -0800 Subject: ipv4: Cache learned PMTU information in inetpeer. The general idea is that if we learn new PMTU information, we bump the peer genid. This triggers the dst_ops->check() code to validate and if necessary propagate the new PMTU value into the metrics. Learned PMTU information self-expires. This means that it is not necessary to kill a cached route entry just because the PMTU information is too old. As a consequence: 1) When the path appears unreachable (dst_ops->link_failure or dst_ops->negative_advice) we unwind the PMTU state if it is out of date, instead of killing the cached route. A redirected route will still be invalidated in these situations. 2) rt_check_expire(), rt_worker_func(), et al. are no longer necessary at all. Signed-off-by: David S. Miller --- net/ipv4/route.c | 260 ++++++++++++++++++------------------------------------- 1 file changed, 86 insertions(+), 174 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0979e03..11faf14 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -131,9 +131,6 @@ static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; -static struct delayed_work expires_work; -static unsigned long expires_ljiffies; - /* * Interface to generic destination cache. */ @@ -668,7 +665,7 @@ static inline int rt_fast_clean(struct rtable *rth) static inline int rt_valuable(struct rtable *rth) { return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || - rth->dst.expires; + (rth->peer && rth->peer->pmtu_expires); } static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) @@ -679,13 +676,7 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t if (atomic_read(&rth->dst.__refcnt)) goto out; - ret = 1; - if (rth->dst.expires && - time_after_eq(jiffies, rth->dst.expires)) - goto out; - age = jiffies - rth->dst.lastuse; - ret = 0; if ((age <= tmo1 && !rt_fast_clean(rth)) || (age <= tmo2 && rt_valuable(rth))) goto out; @@ -829,97 +820,6 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) return ONE; } -static void rt_check_expire(void) -{ - static unsigned int rover; - unsigned int i = rover, goal; - struct rtable *rth; - struct rtable __rcu **rthp; - unsigned long samples = 0; - unsigned long sum = 0, sum2 = 0; - unsigned long delta; - u64 mult; - - delta = jiffies - expires_ljiffies; - expires_ljiffies = jiffies; - mult = ((u64)delta) << rt_hash_log; - if (ip_rt_gc_timeout > 1) - do_div(mult, ip_rt_gc_timeout); - goal = (unsigned int)mult; - if (goal > rt_hash_mask) - goal = rt_hash_mask + 1; - for (; goal > 0; goal--) { - unsigned long tmo = ip_rt_gc_timeout; - unsigned long length; - - i = (i + 1) & rt_hash_mask; - rthp = &rt_hash_table[i].chain; - - if (need_resched()) - cond_resched(); - - samples++; - - if (rcu_dereference_raw(*rthp) == NULL) - continue; - length = 0; - spin_lock_bh(rt_hash_lock_addr(i)); - while ((rth = rcu_dereference_protected(*rthp, - lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { - prefetch(rth->dst.rt_next); - if (rt_is_expired(rth)) { - *rthp = rth->dst.rt_next; - rt_free(rth); - continue; - } - if (rth->dst.expires) { - /* Entry is expired even if it is in use */ - if (time_before_eq(jiffies, rth->dst.expires)) { -nofree: - tmo >>= 1; - rthp = &rth->dst.rt_next; - /* - * We only count entries on - * a chain with equal hash inputs once - * so that entries for different QOS - * levels, and other non-hash input - * attributes don't unfairly skew - * the length computation - */ - length += has_noalias(rt_hash_table[i].chain, rth); - continue; - } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) - goto nofree; - - /* Cleanup aged off entries. */ - *rthp = rth->dst.rt_next; - rt_free(rth); - } - spin_unlock_bh(rt_hash_lock_addr(i)); - sum += length; - sum2 += length*length; - } - if (samples) { - unsigned long avg = sum / samples; - unsigned long sd = int_sqrt(sum2 / samples - avg*avg); - rt_chain_length_max = max_t(unsigned long, - ip_rt_gc_elasticity, - (avg + 4*sd) >> FRACT_BITS); - } - rover = i; -} - -/* - * rt_worker_func() is run in process context. - * we call rt_check_expire() to scan part of the hash table - */ -static void rt_worker_func(struct work_struct *work) -{ - rt_check_expire(); - schedule_delayed_work(&expires_work, ip_rt_gc_interval); -} - /* * Pertubation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -1535,9 +1435,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) if (dst->obsolete > 0) { ip_rt_put(rt); ret = NULL; - } else if ((rt->rt_flags & RTCF_REDIRECTED) || - (rt->dst.expires && - time_after_eq(jiffies, rt->dst.expires))) { + } else if (rt->rt_flags & RTCF_REDIRECTED) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, rt->fl.oif, rt_genid(dev_net(dst->dev))); @@ -1547,6 +1445,14 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) #endif rt_del(hash, rt); ret = NULL; + } else if (rt->peer && + rt->peer->pmtu_expires && + time_after_eq(jiffies, rt->peer->pmtu_expires)) { + unsigned long orig = rt->peer->pmtu_expires; + + if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) + dst_metric_set(dst, RTAX_MTU, + rt->peer->pmtu_orig); } } return ret; @@ -1697,80 +1603,78 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev) { - int i, k; unsigned short old_mtu = ntohs(iph->tot_len); - struct rtable *rth; - int ikeys[2] = { dev->ifindex, 0 }; - __be32 skeys[2] = { iph->saddr, 0, }; - __be32 daddr = iph->daddr; unsigned short est_mtu = 0; + struct inet_peer *peer; - for (k = 0; k < 2; k++) { - for (i = 0; i < 2; i++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], - rt_genid(net)); - - rcu_read_lock(); - for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; - rth = rcu_dereference(rth->dst.rt_next)) { - unsigned short mtu = new_mtu; + peer = inet_getpeer_v4(iph->daddr, 1); + if (peer) { + unsigned short mtu = new_mtu; - if (rth->fl.fl4_dst != daddr || - rth->fl.fl4_src != skeys[i] || - rth->rt_dst != daddr || - rth->rt_src != iph->saddr || - rth->fl.oif != ikeys[k] || - rt_is_input_route(rth) || - dst_metric_locked(&rth->dst, RTAX_MTU) || - !net_eq(dev_net(rth->dst.dev), net) || - rt_is_expired(rth)) - continue; + if (new_mtu < 68 || new_mtu >= old_mtu) { + /* BSD 4.2 derived systems incorrectly adjust + * tot_len by the IP header length, and report + * a zero MTU in the ICMP message. + */ + if (mtu == 0 && + old_mtu >= 68 + (iph->ihl << 2)) + old_mtu -= iph->ihl << 2; + mtu = guess_mtu(old_mtu); + } - if (new_mtu < 68 || new_mtu >= old_mtu) { + if (mtu < ip_rt_min_pmtu) + mtu = ip_rt_min_pmtu; + if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + est_mtu = mtu; + peer->pmtu_learned = mtu; + peer->pmtu_expires = jiffies + ip_rt_mtu_expires; + } - /* BSD 4.2 compatibility hack :-( */ - if (mtu == 0 && - old_mtu >= dst_mtu(&rth->dst) && - old_mtu >= 68 + (iph->ihl << 2)) - old_mtu -= iph->ihl << 2; + inet_putpeer(peer); - mtu = guess_mtu(old_mtu); - } - if (mtu <= dst_mtu(&rth->dst)) { - if (mtu < dst_mtu(&rth->dst)) { - dst_confirm(&rth->dst); - if (mtu < ip_rt_min_pmtu) { - u32 lock = dst_metric(&rth->dst, - RTAX_LOCK); - mtu = ip_rt_min_pmtu; - lock |= (1 << RTAX_MTU); - dst_metric_set(&rth->dst, RTAX_LOCK, - lock); - } - dst_metric_set(&rth->dst, RTAX_MTU, mtu); - dst_set_expires(&rth->dst, - ip_rt_mtu_expires); - } - est_mtu = mtu; - } - } - rcu_read_unlock(); - } + atomic_inc(&__rt_peer_genid); } return est_mtu ? : new_mtu; } +static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) +{ + unsigned long expires = peer->pmtu_expires; + + if (time_before(expires, jiffies)) { + u32 orig_dst_mtu = dst_mtu(dst); + if (peer->pmtu_learned < orig_dst_mtu) { + if (!peer->pmtu_orig) + peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); + dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); + } + } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) + dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); +} + static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst_mtu(dst) > mtu && mtu >= 68 && - !(dst_metric_locked(dst, RTAX_MTU))) { - if (mtu < ip_rt_min_pmtu) { - u32 lock = dst_metric(dst, RTAX_LOCK); + struct rtable *rt = (struct rtable *) dst; + struct inet_peer *peer; + + dst_confirm(dst); + + if (!rt->peer) + rt_bind_peer(rt, 1); + peer = rt->peer; + if (peer) { + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); + if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + peer->pmtu_learned = mtu; + peer->pmtu_expires = jiffies + ip_rt_mtu_expires; + + atomic_inc(&__rt_peer_genid); + rt->rt_peer_genid = rt_peer_genid(); + + check_peer_pmtu(dst, peer); } - dst_metric_set(dst, RTAX_MTU, mtu); - dst_set_expires(dst, ip_rt_mtu_expires); + inet_putpeer(peer); } } @@ -1781,9 +1685,15 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) if (rt_is_expired(rt)) return NULL; if (rt->rt_peer_genid != rt_peer_genid()) { + struct inet_peer *peer; + if (!rt->peer) rt_bind_peer(rt, 0); + peer = rt->peer; + if (peer && peer->pmtu_expires) + check_peer_pmtu(dst, peer); + rt->rt_peer_genid = rt_peer_genid(); } return dst; @@ -1812,8 +1722,14 @@ static void ipv4_link_failure(struct sk_buff *skb) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); rt = skb_rtable(skb); - if (rt) - dst_set_expires(&rt->dst, 0); + if (rt && + rt->peer && + rt->peer->pmtu_expires) { + unsigned long orig = rt->peer->pmtu_expires; + + if (cmpxchg(&rt->peer->pmtu_expires, orig, 0) == orig) + dst_metric_set(&rt->dst, RTAX_MTU, rt->peer->pmtu_orig); + } } static int ip_rt_bug(struct sk_buff *skb) @@ -1911,6 +1827,9 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) memcpy(peer->metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX); dst_init_metrics(&rt->dst, peer->metrics, false); + + if (peer->pmtu_expires) + check_peer_pmtu(&rt->dst, peer); } else { if (fi->fib_metrics != (u32 *) dst_default_metrics) { rt->fi = fi; @@ -2961,7 +2880,8 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); error = rt->dst.error; - expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; + expires = (rt->peer && rt->peer->pmtu_expires) ? + rt->peer->pmtu_expires - jiffies : 0; if (rt->peer) { inet_peer_refcheck(rt->peer); id = atomic_read(&rt->peer->ip_id_count) & 0xffff; @@ -3418,14 +3338,6 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - /* All the timers, started at system startup tend - to synchronize. Perturb it a bit. - */ - INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); - expires_ljiffies = jiffies; - schedule_delayed_work(&expires_work, - net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM -- cgit v1.1 From f39925dbde7788cfb96419c0f092b086aa325c0f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Feb 2011 22:00:16 -0800 Subject: ipv4: Cache learned redirect information in inetpeer. Note that we do not generate the redirect netevent any longer, because we don't create a new cached route. Instead, once the new neighbour is bound to the cached route, we emit a neigh update event instead. Signed-off-by: David S. Miller --- net/ipv4/route.c | 136 +++++++++++++++++-------------------------------------- 1 file changed, 42 insertions(+), 94 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 11faf14..756f544 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1294,13 +1294,8 @@ static void rt_del(unsigned hash, struct rtable *rt) void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, __be32 saddr, struct net_device *dev) { - int i, k; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct rtable *rth; - struct rtable __rcu **rthp; - __be32 skeys[2] = { saddr, 0 }; - int ikeys[2] = { dev->ifindex, 0 }; - struct netevent_redirect netevent; + struct inet_peer *peer; struct net *net; if (!in_dev) @@ -1312,9 +1307,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, ipv4_is_zeronet(new_gw)) goto reject_redirect; - if (!rt_caching(net)) - goto reject_redirect; - if (!IN_DEV_SHARED_MEDIA(in_dev)) { if (!inet_addr_onlink(in_dev, new_gw, old_gw)) goto reject_redirect; @@ -1325,93 +1317,13 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, goto reject_redirect; } - for (i = 0; i < 2; i++) { - for (k = 0; k < 2; k++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], - rt_genid(net)); - - rthp = &rt_hash_table[hash].chain; - - while ((rth = rcu_dereference(*rthp)) != NULL) { - struct rtable *rt; - - if (rth->fl.fl4_dst != daddr || - rth->fl.fl4_src != skeys[i] || - rth->fl.oif != ikeys[k] || - rt_is_input_route(rth) || - rt_is_expired(rth) || - !net_eq(dev_net(rth->dst.dev), net)) { - rthp = &rth->dst.rt_next; - continue; - } - - if (rth->rt_dst != daddr || - rth->rt_src != saddr || - rth->dst.error || - rth->rt_gateway != old_gw || - rth->dst.dev != dev) - break; - - dst_hold(&rth->dst); - - rt = dst_alloc(&ipv4_dst_ops); - if (rt == NULL) { - ip_rt_put(rth); - return; - } - - /* Copy all the information. */ - *rt = *rth; - rt->dst.__use = 1; - atomic_set(&rt->dst.__refcnt, 1); - rt->dst.child = NULL; - if (rt->dst.dev) - dev_hold(rt->dst.dev); - rt->dst.obsolete = -1; - rt->dst.lastuse = jiffies; - rt->dst.path = &rt->dst; - rt->dst.neighbour = NULL; - rt->dst.hh = NULL; -#ifdef CONFIG_XFRM - rt->dst.xfrm = NULL; -#endif - rt->rt_genid = rt_genid(net); - rt->rt_flags |= RTCF_REDIRECTED; - - /* Gateway is different ... */ - rt->rt_gateway = new_gw; - - /* Redirect received -> path was valid */ - dst_confirm(&rth->dst); - - if (rt->peer) - atomic_inc(&rt->peer->refcnt); - if (rt->fi) - atomic_inc(&rt->fi->fib_clntref); - - if (arp_bind_neighbour(&rt->dst) || - !(rt->dst.neighbour->nud_state & - NUD_VALID)) { - if (rt->dst.neighbour) - neigh_event_send(rt->dst.neighbour, NULL); - ip_rt_put(rth); - rt_drop(rt); - goto do_next; - } + peer = inet_getpeer_v4(daddr, 1); + if (peer) { + peer->redirect_learned.a4 = new_gw; - netevent.old = &rth->dst; - netevent.new = &rt->dst; - call_netevent_notifiers(NETEVENT_REDIRECT, - &netevent); + inet_putpeer(peer); - rt_del(hash, rth); - if (!rt_intern_hash(hash, rt, &rt, NULL, rt->fl.oif)) - ip_rt_put(rt); - goto do_next; - } - do_next: - ; - } + atomic_inc(&__rt_peer_genid); } return; @@ -1678,6 +1590,31 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } +static int check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) +{ + struct rtable *rt = (struct rtable *) dst; + __be32 orig_gw = rt->rt_gateway; + + dst_confirm(&rt->dst); + + neigh_release(rt->dst.neighbour); + rt->dst.neighbour = NULL; + + rt->rt_gateway = peer->redirect_learned.a4; + if (arp_bind_neighbour(&rt->dst) || + !(rt->dst.neighbour->nud_state & NUD_VALID)) { + if (rt->dst.neighbour) + neigh_event_send(rt->dst.neighbour, NULL); + rt->rt_gateway = orig_gw; + return -EAGAIN; + } else { + rt->rt_flags |= RTCF_REDIRECTED; + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, + rt->dst.neighbour); + } + return 0; +} + static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = (struct rtable *) dst; @@ -1694,6 +1631,12 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) if (peer && peer->pmtu_expires) check_peer_pmtu(dst, peer); + if (peer && peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + if (check_peer_redir(dst, peer)) + return NULL; + } + rt->rt_peer_genid = rt_peer_genid(); } return dst; @@ -1830,6 +1773,11 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) if (peer->pmtu_expires) check_peer_pmtu(&rt->dst, peer); + if (peer->redirect_learned.a4 && + peer->redirect_learned.a4 != rt->rt_gateway) { + rt->rt_gateway = peer->redirect_learned.a4; + rt->rt_flags |= RTCF_REDIRECTED; + } } else { if (fi->fib_metrics != (u32 *) dst_default_metrics) { rt->fi = fi; -- cgit v1.1 From c531a12ae63b6438a7859994aca23859f5706010 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Mon, 7 Feb 2011 20:19:30 -0200 Subject: Bluetooth: remove l2cap_load() hack l2cap_load() was added to trigger l2cap.ko module loading from the RFCOMM and BNEP modules. Now that L2CAP module is gone, we don't need it anymore. Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/core.c | 2 -- net/bluetooth/cmtp/core.c | 2 -- net/bluetooth/hidp/core.c | 2 -- net/bluetooth/l2cap_core.c | 8 -------- net/bluetooth/rfcomm/core.c | 2 -- 5 files changed, 16 deletions(-) (limited to 'net') diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 5868597..03d4d12 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -708,8 +708,6 @@ static int __init bnep_init(void) { char flt[50] = ""; - l2cap_load(); - #ifdef CONFIG_BT_BNEP_PROTO_FILTER strcat(flt, "protocol "); #endif diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 2cee71a..964ea91 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -469,8 +469,6 @@ int cmtp_get_conninfo(struct cmtp_conninfo *ci) static int __init cmtp_init(void) { - l2cap_load(); - BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION); cmtp_init_sockets(); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index e0de929..2429ca2 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1019,8 +1019,6 @@ static int __init hidp_init(void) { int ret; - l2cap_load(); - BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); ret = hid_register_driver(&hidp_driver); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 6f054d9..bd88641 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3855,13 +3855,5 @@ void l2cap_exit(void) l2cap_cleanup_sockets(); } -void l2cap_load(void) -{ - /* Dummy function to trigger automatic L2CAP module loading by - * other modules that use L2CAP sockets but don't use any other - * symbols from it. */ -} -EXPORT_SYMBOL(l2cap_load); - module_param(disable_ertm, bool, 0644); MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode"); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 6b83776..c997393 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2154,8 +2154,6 @@ static int __init rfcomm_init(void) { int err; - l2cap_load(); - hci_register_cb(&rfcomm_cb); rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); -- cgit v1.1 From 903d343e202e51059e7d20524010ef54a6087aed Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Thu, 10 Feb 2011 14:16:06 -0200 Subject: Bluetooth: Add L2CAP mode to debugfs output Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index bd88641..a72d6e4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3766,12 +3766,13 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p) sk_for_each(sk, node, &l2cap_sk_list.head) { struct l2cap_pinfo *pi = l2cap_pi(sk); - seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", + seq_printf(f, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state, __le16_to_cpu(pi->psm), pi->scid, pi->dcid, - pi->imtu, pi->omtu, pi->sec_level); + pi->imtu, pi->omtu, pi->sec_level, + pi->mode); } read_unlock_bh(&l2cap_sk_list.lock); -- cgit v1.1 From 5c56580b74e57e56f30e3c5bbc9d7ab487858497 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 15 Feb 2011 19:39:21 +0000 Subject: net: Adjust TX queue kobjects if number of queues changes during unregister If the root qdisc for a net device is mqprio, and the driver's ndo_setup_tc() operation dynamically adds and remvoes TX queues, netif_set_real_num_tx_queues() will be called during device unregistration to remove the extra TX queues when the qdisc is destroyed. Currently this causes the corresponding kobjects to be leaked, and the device's reference count never drops to 0. Signed-off-by: Ben Hutchings --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 6392ea0..30c71f9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1648,7 +1648,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; - if (dev->reg_state == NETREG_REGISTERED) { + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) { ASSERT_RTNL(); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, -- cgit v1.1 From 69a19ee60d5d5adc0addbdffd254f83b60660a07 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 15 Feb 2011 20:32:04 +0000 Subject: net: RPS: Make hardware-accelerated RFS conditional on NETIF_F_NTUPLE For testing and debugging purposes it is useful to be able to disable hardware acceleration of RFS without disabling RFS altogether. Since this is a similar feature to 'n-tuple' flow steering through the ethtool API, test the same feature flag that controls that. Signed-off-by: Ben Hutchings --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 30c71f9..54aaca6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2607,7 +2607,8 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, int rc; /* Should we steer this flow to a different hardware queue? */ - if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap) + if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || + !(dev->features & NETIF_F_NTUPLE)) goto out; rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu); if (rxq_index == skb_get_rx_queue(skb)) -- cgit v1.1 From 8248779b1878f17cce2bb809831f4f2a252bdb77 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Feb 2011 21:59:37 +0100 Subject: netfilter: nfnetlink_log: remove unused parameter Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/nfnetlink_log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 91592da..985e9b7 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -376,7 +376,6 @@ __build_packet_message(struct nfulnl_instance *inst, unsigned int hooknum, const struct net_device *indev, const struct net_device *outdev, - const struct nf_loginfo *li, const char *prefix, unsigned int plen) { struct nfulnl_msg_packet_hdr pmsg; @@ -652,7 +651,7 @@ nfulnl_log_packet(u_int8_t pf, inst->qlen++; __build_packet_message(inst, skb, data_len, pf, - hooknum, in, out, li, prefix, plen); + hooknum, in, out, prefix, plen); if (inst->qlen >= qthreshold) __nfulnl_flush(inst); -- cgit v1.1 From 16a7fd323f93eab88df79fc647575ae9789037c2 Mon Sep 17 00:00:00 2001 From: Tinggong Wang Date: Wed, 9 Feb 2011 02:21:59 +0200 Subject: ipvs: fix timer in get_curr_sync_buff Fix get_curr_sync_buff to keep buffer for 2 seconds as intended, not just for the current jiffie. By this way we will sync more connection structures with single packet. Signed-off-by: Tinggong Wang Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d1b7298..fecf24d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -374,8 +374,8 @@ get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) struct ip_vs_sync_buff *sb; spin_lock_bh(&ipvs->sync_buff_lock); - if (ipvs->sync_buff && (time == 0 || - time_before(jiffies - ipvs->sync_buff->firstuse, time))) { + if (ipvs->sync_buff && + time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) { sb = ipvs->sync_buff; ipvs->sync_buff = NULL; } else -- cgit v1.1 From 6cb90db502c5f276c8d6256762cc3acde4d3bd9d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 9 Feb 2011 02:26:38 +0200 Subject: ipvs: remove extra lookups for ICMP packets Remove code that should not be called anymore. Now when ip_vs_out handles replies for local clients at LOCAL_IN hook we do not need to call conn_out_get and handle_response_icmp from ip_vs_in_icmp* because such lookups were already performed for the ICMP packet and no connection was found. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4d06617..2d1f932 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -729,7 +729,7 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, #endif /* Handle relevant response ICMP messages - forward to the right - * destination host. Used for NAT and local client. + * destination host. */ static int handle_response_icmp(int af, struct sk_buff *skb, union nf_inet_addr *snet, @@ -979,7 +979,6 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) } /* Handle response packets: rewrite addresses and send away... - * Used for NAT and local client. */ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -1280,7 +1279,6 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, ihl, verdict; - union nf_inet_addr snet; *related = 1; @@ -1339,17 +1337,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); - if (!cp) { - /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); - if (cp) { - snet.ip = iph->saddr; - return handle_response_icmp(AF_INET, skb, &snet, - cih->protocol, cp, pp, - offset, ihl); - } + if (!cp) return NF_ACCEPT; - } verdict = NF_DROP; @@ -1395,7 +1384,6 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, verdict; - union nf_inet_addr snet; struct rt6_info *rt; *related = 1; @@ -1455,18 +1443,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); - if (!cp) { - /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); - if (cp) { - ipv6_addr_copy(&snet.in6, &iph->saddr); - return handle_response_icmp(AF_INET6, skb, &snet, - cih->nexthdr, - cp, pp, offset, - sizeof(struct ipv6hdr)); - } + if (!cp) return NF_ACCEPT; - } verdict = NF_DROP; -- cgit v1.1 From 41ac51eeda58a85b8a06d748cce7035cc77deebd Mon Sep 17 00:00:00 2001 From: Patrick Schaaf Date: Fri, 11 Feb 2011 14:01:12 +0100 Subject: ipvs: make "no destination available" message more informative When IP_VS schedulers do not find a destination, they output a terse "WLC: no destination available" message through kernel syslog, which I can not only make sense of because syslog puts them in a logfile together with keepalived checker results. This patch makes the output a bit more informative, by telling you which virtual service failed to find a destination. Example output: kernel: [1539214.552233] IPVS: wlc: TCP 192.168.8.30:22 - no destination available kernel: [1539299.674418] IPVS: wlc: FWM 22 0x00000016 - no destination available I have tested the code for IPv4 and FWM services, as you can see from the example; I do not have an IPv6 setup to test the third code path with. To avoid code duplication, I put a new function ip_vs_scheduler_err() into ip_vs_sched.c, and use that from the schedulers instead of calling IP_VS_ERR_RL directly. Signed-off-by: Patrick Schaaf Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_lblc.c | 2 +- net/netfilter/ipvs/ip_vs_lblcr.c | 2 +- net/netfilter/ipvs/ip_vs_lc.c | 2 +- net/netfilter/ipvs/ip_vs_nq.c | 2 +- net/netfilter/ipvs/ip_vs_rr.c | 2 +- net/netfilter/ipvs/ip_vs_sched.c | 25 +++++++++++++++++++++++++ net/netfilter/ipvs/ip_vs_sed.c | 2 +- net/netfilter/ipvs/ip_vs_sh.c | 2 +- net/netfilter/ipvs/ip_vs_wlc.c | 2 +- net/netfilter/ipvs/ip_vs_wrr.c | 14 ++++++++------ 10 files changed, 41 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 00b5ffa..4a9c8cd 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -510,7 +510,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* No cache entry or it is invalid, time to schedule */ dest = __ip_vs_lblc_schedule(svc); if (!dest) { - IP_VS_ERR_RL("LBLC: no destination available\n"); + ip_vs_scheduler_err(svc, "no destination available"); return NULL; } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index bfa25f1..bd329b1 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -692,7 +692,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* The cache entry is invalid, time to schedule */ dest = __ip_vs_lblcr_schedule(svc); if (!dest) { - IP_VS_ERR_RL("LBLCR: no destination available\n"); + ip_vs_scheduler_err(svc, "no destination available"); read_unlock(&svc->sched_lock); return NULL; } diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 4f69db1..6063800 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -70,7 +70,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } if (!least) - IP_VS_ERR_RL("LC: no destination available\n"); + ip_vs_scheduler_err(svc, "no destination available"); else IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d " "inactconns %d\n", diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index c413e18..984d9c1 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -99,7 +99,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } if (!least) { - IP_VS_ERR_RL("NQ: no destination available\n"); + ip_vs_scheduler_err(svc, "no destination available"); return NULL; } diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index e210f37..c49b388 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -72,7 +72,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) q = q->next; } while (q != p); write_unlock(&svc->sched_lock); - IP_VS_ERR_RL("RR: no destination available\n"); + ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 076ebe0..08dbdd5 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -29,6 +29,7 @@ #include +EXPORT_SYMBOL(ip_vs_scheduler_err); /* * IPVS scheduler list */ @@ -146,6 +147,30 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) module_put(scheduler->module); } +/* + * Common error output helper for schedulers + */ + +void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) +{ + if (svc->fwmark) { + IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", + svc->scheduler->name, svc->fwmark, + svc->fwmark, msg); +#ifdef CONFIG_IP_VS_IPV6 + } else if (svc->af == AF_INET6) { + IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n", + svc->scheduler->name, + ip_vs_proto_name(svc->protocol), + &svc->addr.in6, ntohs(svc->port), msg); +#endif + } else { + IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", + svc->scheduler->name, + ip_vs_proto_name(svc->protocol), + &svc->addr.ip, ntohs(svc->port), msg); + } +} /* * Register a scheduler in the scheduler list diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 1ab75a9..89ead24 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -87,7 +87,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) goto nextstage; } } - IP_VS_ERR_RL("SED: no destination available\n"); + ip_vs_scheduler_err(svc, "no destination available"); return NULL; /* diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index e6cc174..b5e2556 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -223,7 +223,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { - IP_VS_ERR_RL("SH: no destination available\n"); + ip_vs_scheduler_err(svc, "no destination available"); return NULL; } diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index bbddfdb..fdf0f58 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -75,7 +75,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) goto nextstage; } } - IP_VS_ERR_RL("WLC: no destination available\n"); + ip_vs_scheduler_err(svc, "no destination available"); return NULL; /* diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 30db633..1ef41f5 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -147,8 +147,9 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if (mark->cl == mark->cl->next) { /* no dest entry */ - IP_VS_ERR_RL("WRR: no destination available: " - "no destinations present\n"); + ip_vs_scheduler_err(svc, + "no destination available: " + "no destinations present"); dest = NULL; goto out; } @@ -162,8 +163,8 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) */ if (mark->cw == 0) { mark->cl = &svc->destinations; - IP_VS_ERR_RL("WRR: no destination " - "available\n"); + ip_vs_scheduler_err(svc, + "no destination available"); dest = NULL; goto out; } @@ -185,8 +186,9 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* back to the start, and no dest is found. It is only possible when all dests are OVERLOADED */ dest = NULL; - IP_VS_ERR_RL("WRR: no destination available: " - "all destinations are overloaded\n"); + ip_vs_scheduler_err(svc, + "no destination available: " + "all destinations are overloaded"); goto out; } } -- cgit v1.1 From fcd89c09a59a054fb986861e0862aa2fff7d7c40 Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Thu, 10 Feb 2011 22:38:47 -0300 Subject: Bluetooth: Add LE connect support Bluetooth V4.0 adds support for Low Energy (LE) connections. Specification introduces new set of hci commands to control LE connection. This patch adds logic to create, cancel and disconnect LE connections. Signed-off-by: Ville Tervo Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 51 ++++++++++++++++++++++++-- net/bluetooth/hci_event.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 42dc39f..d0c470c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -45,6 +45,32 @@ #include #include +static void hci_le_connect(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct hci_cp_le_create_conn cp; + + conn->state = BT_CONNECT; + conn->out = 1; + + memset(&cp, 0, sizeof(cp)); + cp.scan_interval = cpu_to_le16(0x0004); + cp.scan_window = cpu_to_le16(0x0004); + bacpy(&cp.peer_addr, &conn->dst); + cp.conn_interval_min = cpu_to_le16(0x0008); + cp.conn_interval_max = cpu_to_le16(0x0100); + cp.supervision_timeout = cpu_to_le16(0x0064); + cp.min_ce_len = cpu_to_le16(0x0001); + cp.max_ce_len = cpu_to_le16(0x0001); + + hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp); +} + +static void hci_le_connect_cancel(struct hci_conn *conn) +{ + hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); +} + void hci_acl_connect(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; @@ -193,8 +219,12 @@ static void hci_conn_timeout(unsigned long arg) switch (conn->state) { case BT_CONNECT: case BT_CONNECT2: - if (conn->type == ACL_LINK && conn->out) - hci_acl_connect_cancel(conn); + if (conn->out) { + if (conn->type == ACL_LINK) + hci_acl_connect_cancel(conn); + else if (conn->type == LE_LINK) + hci_le_connect_cancel(conn); + } break; case BT_CONFIG: case BT_CONNECTED: @@ -361,15 +391,30 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) } EXPORT_SYMBOL(hci_get_route); -/* Create SCO or ACL connection. +/* Create SCO, ACL or LE connection. * Device _must_ be locked */ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; + struct hci_conn *le; BT_DBG("%s dst %s", hdev->name, batostr(dst)); + if (type == LE_LINK) { + le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); + if (!le) + le = hci_conn_add(hdev, LE_LINK, dst); + if (!le) + return NULL; + if (le->state == BT_OPEN) + hci_le_connect(le); + + hci_conn_hold(le); + + return le; + } + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add(hdev, ACL_LINK, dst); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index cee46cb..47c6e93 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1107,6 +1107,43 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status) hci_dev_unlock(hdev); } +static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status) +{ + struct hci_cp_le_create_conn *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%x", hdev->name, status); + + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr); + + BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr), + conn); + + if (status) { + if (conn && conn->state == BT_CONNECT) { + conn->state = BT_CLOSED; + hci_proto_connect_cfm(conn, status); + hci_conn_del(conn); + } + } else { + if (!conn) { + conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr); + if (conn) + conn->out = 1; + else + BT_ERR("No memory for new connection"); + } + } + + hci_dev_unlock(hdev); +} + static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { __u8 status = *((__u8 *) skb->data); @@ -1738,6 +1775,10 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) mgmt_disconnect_failed(hdev->id); break; + case HCI_OP_LE_CREATE_CONN: + hci_cs_le_create_conn(hdev, ev->status); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; @@ -2321,6 +2362,54 @@ static inline void hci_remote_host_features_evt(struct hci_dev *hdev, struct sk_ hci_dev_unlock(hdev); } +static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_le_conn_complete *ev = (void *) skb->data; + struct hci_conn *conn; + + BT_DBG("%s status %d", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr); + if (!conn) + goto unlock; + + if (ev->status) { + hci_proto_connect_cfm(conn, ev->status); + conn->state = BT_CLOSED; + hci_conn_del(conn); + goto unlock; + } + + conn->handle = __le16_to_cpu(ev->handle); + conn->state = BT_CONNECTED; + + hci_conn_hold_device(conn); + hci_conn_add_sysfs(conn); + + hci_proto_connect_cfm(conn, ev->status); + +unlock: + hci_dev_unlock(hdev); +} + +static inline void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_ev_le_meta *le_ev = (void *) skb->data; + + skb_pull(skb, sizeof(*le_ev)); + + switch (le_ev->subevent) { + case HCI_EV_LE_CONN_COMPLETE: + hci_le_conn_complete_evt(hdev, skb); + break; + + default: + break; + } +} + void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_event_hdr *hdr = (void *) skb->data; @@ -2461,6 +2550,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_remote_host_features_evt(hdev, skb); break; + case HCI_EV_LE_META: + hci_le_meta_evt(hdev, skb); + break; + default: BT_DBG("%s event 0x%x", hdev->name, event); break; -- cgit v1.1 From 6ed58ec520ad2b2fe3f955c8a5fd0eecafccebdf Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Thu, 10 Feb 2011 22:38:48 -0300 Subject: Bluetooth: Use LE buffers for LE traffic Bluetooth chips may have separate buffers for LE traffic. This patch add support to use LE buffers provided by the chip. Signed-off-by: Ville Tervo Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 5 ++++ net/bluetooth/hci_core.c | 74 ++++++++++++++++++++++++++++++++++++++++++++--- net/bluetooth/hci_event.c | 33 +++++++++++++++++++++ 3 files changed, 108 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d0c470c..aecd78e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -326,6 +326,11 @@ int hci_conn_del(struct hci_conn *conn) /* Unacked frames */ hdev->acl_cnt += conn->sent; + } else if (conn->type == LE_LINK) { + if (hdev->le_pkts) + hdev->le_cnt += conn->sent; + else + hdev->acl_cnt += conn->sent; } else { struct hci_conn *acl = conn->link; if (acl) { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2f00322..9296053 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -263,6 +263,14 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); } +static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt) +{ + BT_DBG("%s", hdev->name); + + /* Read LE buffer size */ + hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); +} + static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) { __u8 scan = opt; @@ -529,6 +537,10 @@ int hci_dev_open(__u16 dev) ret = __hci_request(hdev, hci_init_req, 0, msecs_to_jiffies(HCI_INIT_TIMEOUT)); + if (lmp_le_capable(hdev)) + ret = __hci_request(hdev, hci_le_init_req, 0, + msecs_to_jiffies(HCI_INIT_TIMEOUT)); + clear_bit(HCI_INIT, &hdev->flags); } @@ -671,7 +683,7 @@ int hci_dev_reset(__u16 dev) hdev->flush(hdev); atomic_set(&hdev->cmd_cnt, 1); - hdev->acl_cnt = 0; hdev->sco_cnt = 0; + hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; if (!test_bit(HCI_RAW, &hdev->flags)) ret = __hci_request(hdev, hci_reset_req, 0, @@ -1672,8 +1684,25 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int } if (conn) { - int cnt = (type == ACL_LINK ? hdev->acl_cnt : hdev->sco_cnt); - int q = cnt / num; + int cnt, q; + + switch (conn->type) { + case ACL_LINK: + cnt = hdev->acl_cnt; + break; + case SCO_LINK: + case ESCO_LINK: + cnt = hdev->sco_cnt; + break; + case LE_LINK: + cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; + break; + default: + cnt = 0; + BT_ERR("Unknown link type"); + } + + q = cnt / num; *quote = q ? q : 1; } else *quote = 0; @@ -1772,6 +1801,40 @@ static inline void hci_sched_esco(struct hci_dev *hdev) } } +static inline void hci_sched_le(struct hci_dev *hdev) +{ + struct hci_conn *conn; + struct sk_buff *skb; + int quote, cnt; + + BT_DBG("%s", hdev->name); + + if (!test_bit(HCI_RAW, &hdev->flags)) { + /* LE tx timeout must be longer than maximum + * link supervision timeout (40.9 seconds) */ + if (!hdev->le_cnt && + time_after(jiffies, hdev->le_last_tx + HZ * 45)) + hci_acl_tx_to(hdev); + } + + cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; + while (cnt && (conn = hci_low_sent(hdev, LE_LINK, "e))) { + while (quote-- && (skb = skb_dequeue(&conn->data_q))) { + BT_DBG("skb %p len %d", skb, skb->len); + + hci_send_frame(skb); + hdev->le_last_tx = jiffies; + + cnt--; + conn->sent++; + } + } + if (hdev->le_pkts) + hdev->le_cnt = cnt; + else + hdev->acl_cnt = cnt; +} + static void hci_tx_task(unsigned long arg) { struct hci_dev *hdev = (struct hci_dev *) arg; @@ -1779,7 +1842,8 @@ static void hci_tx_task(unsigned long arg) read_lock(&hci_task_lock); - BT_DBG("%s acl %d sco %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt); + BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt, + hdev->sco_cnt, hdev->le_cnt); /* Schedule queues and send stuff to HCI driver */ @@ -1789,6 +1853,8 @@ static void hci_tx_task(unsigned long arg) hci_sched_esco(hdev); + hci_sched_le(hdev); + /* Send next queued raw (unknown type) packet */ while ((skb = skb_dequeue(&hdev->raw_q))) hci_send_frame(skb); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 47c6e93..3155ad5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -776,6 +776,25 @@ static void hci_cc_pin_code_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) mgmt_pin_code_neg_reply_complete(hdev->id, &rp->bdaddr, rp->status); } +static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_le_read_buffer_size *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (rp->status) + return; + + hdev->le_mtu = __le16_to_cpu(rp->le_mtu); + hdev->le_pkts = rp->le_max_pkt; + + hdev->le_cnt = hdev->le_pkts; + + BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); + + hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); +} static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { @@ -1704,6 +1723,10 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_pin_code_neg_reply(hdev, skb); break; + case HCI_OP_LE_READ_BUFFER_SIZE: + hci_cc_le_read_buffer_size(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; @@ -1849,6 +1872,16 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s hdev->acl_cnt += count; if (hdev->acl_cnt > hdev->acl_pkts) hdev->acl_cnt = hdev->acl_pkts; + } else if (conn->type == LE_LINK) { + if (hdev->le_pkts) { + hdev->le_cnt += count; + if (hdev->le_cnt > hdev->le_pkts) + hdev->le_cnt = hdev->le_pkts; + } else { + hdev->acl_cnt += count; + if (hdev->acl_cnt > hdev->acl_pkts) + hdev->acl_cnt = hdev->acl_pkts; + } } else { hdev->sco_cnt += count; if (hdev->sco_cnt > hdev->sco_pkts) -- cgit v1.1 From acd7d3708555b3da7522e23c183cc21efc785f72 Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Thu, 10 Feb 2011 22:38:49 -0300 Subject: Bluetooth: Add LE connection support to L2CAP Add basic LE connection support to L2CAP. LE connection can be created by specifying cid in struct sockaddr_l2 Signed-off-by: Ville Tervo Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 23 +++++++++++++++++++---- net/bluetooth/l2cap_sock.c | 7 ++++--- 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a72d6e4..123c1bf 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -593,6 +593,12 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { bh_lock_sock(sk); + if (conn->hcon->type == LE_LINK) { + l2cap_sock_clear_timer(sk); + sk->sk_state = BT_CONNECTED; + sk->sk_state_change(sk); + } + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM) { l2cap_sock_clear_timer(sk); @@ -651,7 +657,11 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p conn %p", hcon, conn); - conn->mtu = hcon->hdev->acl_mtu; + if (hcon->hdev->le_mtu && hcon->type == LE_LINK) + conn->mtu = hcon->hdev->le_mtu; + else + conn->mtu = hcon->hdev->acl_mtu; + conn->src = &hcon->hdev->bdaddr; conn->dst = &hcon->dst; @@ -758,8 +768,13 @@ int l2cap_do_connect(struct sock *sk) auth_type = l2cap_get_auth_type(sk); - hcon = hci_connect(hdev, ACL_LINK, dst, + if (l2cap_pi(sk)->dcid == L2CAP_CID_LE_DATA) + hcon = hci_connect(hdev, LE_LINK, dst, l2cap_pi(sk)->sec_level, auth_type); + else + hcon = hci_connect(hdev, ACL_LINK, dst, + l2cap_pi(sk)->sec_level, auth_type); + if (!hcon) goto done; @@ -3520,7 +3535,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); - if (hcon->type != ACL_LINK) + if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK)) return -EINVAL; if (!status) { @@ -3549,7 +3564,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); - if (hcon->type != ACL_LINK) + if (!(hcon->type == ACL_LINK || hcon->type == LE_LINK)) return -EINVAL; l2cap_conn_del(hcon, bt_err(reason)); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 21f5385..f45d361 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -168,13 +168,13 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al len = min_t(unsigned int, sizeof(la), alen); memcpy(&la, addr, len); - if (la.l2_cid) + if (la.l2_cid && la.l2_psm) return -EINVAL; lock_sock(sk); if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) - && !la.l2_psm) { + && !(la.l2_psm || la.l2_cid)) { err = -EINVAL; goto done; } @@ -216,7 +216,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al /* PSM must be odd and lsb of upper byte must be 0 */ if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 && - sk->sk_type != SOCK_RAW) { + sk->sk_type != SOCK_RAW && !la.l2_cid) { err = -EINVAL; goto done; } @@ -224,6 +224,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al /* Set destination address and psm */ bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); l2cap_pi(sk)->psm = la.l2_psm; + l2cap_pi(sk)->dcid = la.l2_cid; err = l2cap_do_connect(sk); if (err) -- cgit v1.1 From b62f328b8f20abe97cdbaaf44c6e4f5e7a610f18 Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Thu, 10 Feb 2011 22:38:50 -0300 Subject: Bluetooth: Add server socket support for LE connection Add support for LE server sockets. Signed-off-by: Ville Tervo Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 10 ++++- net/bluetooth/l2cap_core.c | 94 ++++++++++++++++++++++++++++++++++++++++++++-- net/bluetooth/l2cap_sock.c | 7 +++- 3 files changed, 104 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3155ad5..74f04a2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2405,8 +2405,14 @@ static inline void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr); - if (!conn) - goto unlock; + if (!conn) { + conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr); + if (!conn) { + BT_ERR("No memory for new connection"); + hci_dev_unlock(hdev); + return; + } + } if (ev->status) { hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 123c1bf..3079175 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -181,8 +181,16 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so l2cap_pi(sk)->conn = conn; if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) { - /* Alloc CID for connection-oriented socket */ - l2cap_pi(sk)->scid = l2cap_alloc_cid(l); + if (conn->hcon->type == LE_LINK) { + /* LE connection */ + l2cap_pi(sk)->omtu = L2CAP_LE_DEFAULT_MTU; + l2cap_pi(sk)->scid = L2CAP_CID_LE_DATA; + l2cap_pi(sk)->dcid = L2CAP_CID_LE_DATA; + } else { + /* Alloc CID for connection-oriented socket */ + l2cap_pi(sk)->scid = l2cap_alloc_cid(l); + l2cap_pi(sk)->omtu = L2CAP_DEFAULT_MTU; + } } else if (sk->sk_type == SOCK_DGRAM) { /* Connectionless socket */ l2cap_pi(sk)->scid = L2CAP_CID_CONN_LESS; @@ -581,6 +589,82 @@ static void l2cap_conn_start(struct l2cap_conn *conn) } } +/* Find socket with cid and source bdaddr. + * Returns closest match, locked. + */ +static struct sock *l2cap_get_sock_by_scid(int state, __le16 cid, bdaddr_t *src) +{ + struct sock *s, *sk = NULL, *sk1 = NULL; + struct hlist_node *node; + + read_lock(&l2cap_sk_list.lock); + + sk_for_each(sk, node, &l2cap_sk_list.head) { + if (state && sk->sk_state != state) + continue; + + if (l2cap_pi(sk)->scid == cid) { + /* Exact match. */ + if (!bacmp(&bt_sk(sk)->src, src)) + break; + + /* Closest match */ + if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) + sk1 = sk; + } + } + s = node ? sk : sk1; + if (s) + bh_lock_sock(s); + read_unlock(&l2cap_sk_list.lock); + + return s; +} + +static void l2cap_le_conn_ready(struct l2cap_conn *conn) +{ + struct l2cap_chan_list *list = &conn->chan_list; + struct sock *parent, *uninitialized_var(sk); + + BT_DBG(""); + + /* Check if we have socket listening on cid */ + parent = l2cap_get_sock_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA, + conn->src); + if (!parent) + return; + + /* Check for backlog size */ + if (sk_acceptq_is_full(parent)) { + BT_DBG("backlog full %d", parent->sk_ack_backlog); + goto clean; + } + + sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP, GFP_ATOMIC); + if (!sk) + goto clean; + + write_lock_bh(&list->lock); + + hci_conn_hold(conn->hcon); + + l2cap_sock_init(sk, parent); + bacpy(&bt_sk(sk)->src, conn->src); + bacpy(&bt_sk(sk)->dst, conn->dst); + + __l2cap_chan_add(conn, sk, parent); + + l2cap_sock_set_timer(sk, sk->sk_sndtimeo); + + sk->sk_state = BT_CONNECTED; + parent->sk_data_ready(parent, 0); + + write_unlock_bh(&list->lock); + +clean: + bh_unlock_sock(parent); +} + static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan_list *l = &conn->chan_list; @@ -588,6 +672,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) BT_DBG("conn %p", conn); + if (!conn->hcon->out && conn->hcon->type == LE_LINK) + l2cap_le_conn_ready(conn); + read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { @@ -670,7 +757,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); - setup_timer(&conn->info_timer, l2cap_info_timeout, + if (hcon->type != LE_LINK) + setup_timer(&conn->info_timer, l2cap_info_timeout, (unsigned long) conn); conn->disc_reason = 0x13; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f45d361..a8d2893 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -103,7 +103,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) len = min_t(unsigned int, sizeof(la), alen); memcpy(&la, addr, len); - if (la.l2_cid) + if (la.l2_cid && la.l2_psm) return -EINVAL; lock_sock(sk); @@ -145,6 +145,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; } + if (la.l2_cid) + l2cap_pi(sk)->scid = la.l2_cid; + write_unlock_bh(&l2cap_sk_list.lock); done: @@ -266,7 +269,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) goto done; } - if (!l2cap_pi(sk)->psm) { + if (!l2cap_pi(sk)->psm && !l2cap_pi(sk)->dcid) { bdaddr_t *src = &bt_sk(sk)->src; u16 psm; -- cgit v1.1 From 5589fa9c2d2b8c134f44db36892ccc500aac3147 Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Thu, 10 Feb 2011 22:38:51 -0300 Subject: Bluetooth: Do not send disconn comand over LE links l2cap over LE links can be disconnected without sending disconnect command first. Signed-off-by: Ville Tervo Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a8d2893..484e717 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -885,6 +885,8 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) void __l2cap_sock_close(struct sock *sk, int reason) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); switch (sk->sk_state) { @@ -894,10 +896,9 @@ void __l2cap_sock_close(struct sock *sk, int reason) case BT_CONNECTED: case BT_CONFIG: - if (sk->sk_type == SOCK_SEQPACKET || - sk->sk_type == SOCK_STREAM) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - + if ((sk->sk_type == SOCK_SEQPACKET || + sk->sk_type == SOCK_STREAM) && + conn->hcon->type == ACL_LINK) { l2cap_sock_set_timer(sk, sk->sk_sndtimeo); l2cap_send_disconn_req(conn, sk, reason); } else @@ -905,9 +906,9 @@ void __l2cap_sock_close(struct sock *sk, int reason) break; case BT_CONNECT2: - if (sk->sk_type == SOCK_SEQPACKET || - sk->sk_type == SOCK_STREAM) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; + if ((sk->sk_type == SOCK_SEQPACKET || + sk->sk_type == SOCK_STREAM) && + conn->hcon->type == ACL_LINK) { struct l2cap_conn_rsp rsp; __u16 result; -- cgit v1.1 From b92a62238ff2d3fb88cf0f6de454f3d1b4ae5d52 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 10 Feb 2011 22:38:52 -0300 Subject: Bluetooth: Fix initiated LE connections Fix LE connections not being marked as master. Signed-off-by: Vinicius Costa Gomes Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index aecd78e..efcd2b5 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -52,6 +52,7 @@ static void hci_le_connect(struct hci_conn *conn) conn->state = BT_CONNECT; conn->out = 1; + conn->link_mode |= HCI_LM_MASTER; memset(&cp, 0, sizeof(cp)); cp.scan_interval = cpu_to_le16(0x0004); -- cgit v1.1 From bae1f5d9464d231148301fcbf4e425a096a5b96d Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Thu, 10 Feb 2011 22:38:53 -0300 Subject: Bluetooth: Treat LE and ACL links separately on timeout Separate LE and ACL timeouts. Othervise ACL connections on non LE hw will time out after 45 secs. Signed-off-by: Ville Tervo Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9296053..173bebd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1711,19 +1711,19 @@ static inline struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int return conn; } -static inline void hci_acl_tx_to(struct hci_dev *hdev) +static inline void hci_link_tx_to(struct hci_dev *hdev, __u8 type) { struct hci_conn_hash *h = &hdev->conn_hash; struct list_head *p; struct hci_conn *c; - BT_ERR("%s ACL tx timeout", hdev->name); + BT_ERR("%s link tx timeout", hdev->name); /* Kill stalled connections */ list_for_each(p, &h->list) { c = list_entry(p, struct hci_conn, list); - if (c->type == ACL_LINK && c->sent) { - BT_ERR("%s killing stalled ACL connection %s", + if (c->type == type && c->sent) { + BT_ERR("%s killing stalled connection %s", hdev->name, batostr(&c->dst)); hci_acl_disconn(c, 0x13); } @@ -1742,7 +1742,7 @@ static inline void hci_sched_acl(struct hci_dev *hdev) /* ACL tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ if (!hdev->acl_cnt && time_after(jiffies, hdev->acl_last_tx + HZ * 45)) - hci_acl_tx_to(hdev); + hci_link_tx_to(hdev, ACL_LINK); } while (hdev->acl_cnt && (conn = hci_low_sent(hdev, ACL_LINK, "e))) { @@ -1812,9 +1812,9 @@ static inline void hci_sched_le(struct hci_dev *hdev) if (!test_bit(HCI_RAW, &hdev->flags)) { /* LE tx timeout must be longer than maximum * link supervision timeout (40.9 seconds) */ - if (!hdev->le_cnt && + if (!hdev->le_cnt && hdev->le_pkts && time_after(jiffies, hdev->le_last_tx + HZ * 45)) - hci_acl_tx_to(hdev); + hci_link_tx_to(hdev, LE_LINK); } cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; -- cgit v1.1 From 3300d9a930a79508032e3e03ac2bde3a22dd048d Mon Sep 17 00:00:00 2001 From: Claudio Takahasi Date: Fri, 11 Feb 2011 19:28:54 -0200 Subject: Bluetooth: Add LE signaling commands handling This patch splits the L2CAP command handling function in order to have a clear separation between the commands related to BR/EDR and LE. Commands and responses in the LE signaling channel are not being handled yet, command reject is sent to all received requests. Bluetooth Core Specification, Volume 3, Part A, section 4 defines the signaling packets formats and allowed commands/responses over the LE signaling channel. Signed-off-by: Claudio Takahasi Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 142 ++++++++++++++++++++++++++++----------------- 1 file changed, 90 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3079175..ce781a4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1428,7 +1428,11 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen); - lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); + + if (conn->hcon->type == LE_LINK) + lh->cid = cpu_to_le16(L2CAP_CID_LE_SIGNALING); + else + lh->cid = cpu_to_le16(L2CAP_CID_SIGNALING); cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE); cmd->code = code; @@ -2497,12 +2501,90 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm return 0; } -static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) +static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) +{ + int err = 0; + + switch (cmd->code) { + case L2CAP_COMMAND_REJ: + l2cap_command_rej(conn, cmd, data); + break; + + case L2CAP_CONN_REQ: + err = l2cap_connect_req(conn, cmd, data); + break; + + case L2CAP_CONN_RSP: + err = l2cap_connect_rsp(conn, cmd, data); + break; + + case L2CAP_CONF_REQ: + err = l2cap_config_req(conn, cmd, cmd_len, data); + break; + + case L2CAP_CONF_RSP: + err = l2cap_config_rsp(conn, cmd, data); + break; + + case L2CAP_DISCONN_REQ: + err = l2cap_disconnect_req(conn, cmd, data); + break; + + case L2CAP_DISCONN_RSP: + err = l2cap_disconnect_rsp(conn, cmd, data); + break; + + case L2CAP_ECHO_REQ: + l2cap_send_cmd(conn, cmd->ident, L2CAP_ECHO_RSP, cmd_len, data); + break; + + case L2CAP_ECHO_RSP: + break; + + case L2CAP_INFO_REQ: + err = l2cap_information_req(conn, cmd, data); + break; + + case L2CAP_INFO_RSP: + err = l2cap_information_rsp(conn, cmd, data); + break; + + default: + BT_ERR("Unknown BR/EDR signaling command 0x%2.2x", cmd->code); + err = -EINVAL; + break; + } + + return err; +} + +static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u8 *data) +{ + switch (cmd->code) { + case L2CAP_COMMAND_REJ: + return 0; + + case L2CAP_CONN_PARAM_UPDATE_REQ: + return -EINVAL; + + case L2CAP_CONN_PARAM_UPDATE_RSP: + return 0; + + default: + BT_ERR("Unknown LE signaling command 0x%2.2x", cmd->code); + return -EINVAL; + } +} + +static inline void l2cap_sig_channel(struct l2cap_conn *conn, + struct sk_buff *skb) { u8 *data = skb->data; int len = skb->len; struct l2cap_cmd_hdr cmd; - int err = 0; + int err; l2cap_raw_recv(conn, skb); @@ -2521,55 +2603,10 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk break; } - switch (cmd.code) { - case L2CAP_COMMAND_REJ: - l2cap_command_rej(conn, &cmd, data); - break; - - case L2CAP_CONN_REQ: - err = l2cap_connect_req(conn, &cmd, data); - break; - - case L2CAP_CONN_RSP: - err = l2cap_connect_rsp(conn, &cmd, data); - break; - - case L2CAP_CONF_REQ: - err = l2cap_config_req(conn, &cmd, cmd_len, data); - break; - - case L2CAP_CONF_RSP: - err = l2cap_config_rsp(conn, &cmd, data); - break; - - case L2CAP_DISCONN_REQ: - err = l2cap_disconnect_req(conn, &cmd, data); - break; - - case L2CAP_DISCONN_RSP: - err = l2cap_disconnect_rsp(conn, &cmd, data); - break; - - case L2CAP_ECHO_REQ: - l2cap_send_cmd(conn, cmd.ident, L2CAP_ECHO_RSP, cmd_len, data); - break; - - case L2CAP_ECHO_RSP: - break; - - case L2CAP_INFO_REQ: - err = l2cap_information_req(conn, &cmd, data); - break; - - case L2CAP_INFO_RSP: - err = l2cap_information_rsp(conn, &cmd, data); - break; - - default: - BT_ERR("Unknown signaling command 0x%2.2x", cmd.code); - err = -EINVAL; - break; - } + if (conn->hcon->type == LE_LINK) + err = l2cap_le_sig_cmd(conn, &cmd, data); + else + err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); if (err) { struct l2cap_cmd_rej rej; @@ -3566,6 +3603,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("len %d, cid 0x%4.4x", len, cid); switch (cid) { + case L2CAP_CID_LE_SIGNALING: case L2CAP_CID_SIGNALING: l2cap_sig_channel(conn, skb); break; -- cgit v1.1 From de73115a7d67e1b81dbde2285a7657f3e3867703 Mon Sep 17 00:00:00 2001 From: Claudio Takahasi Date: Fri, 11 Feb 2011 19:28:55 -0200 Subject: Bluetooth: Add connection parameter update response Implements L2CAP Connection Parameter Update Response defined in the Bluetooth Core Specification, Volume 3, Part A, section 4.21. Address the LE Connection Parameter Procedure initiated by the slave. Connection Interval Minimum and Maximum have the same range: 6 to 3200. Time = N * 1.25ms. Minimum shall be less or equal to Maximum. The Slave Latency field shall have a value in the range of 0 to ((connSupervisionTimeout / connIntervalMax) - 1). Latency field shall be less than 500. connSupervisionTimeout = Timeout Multiplier * 10 ms. Multiplier field shall have a value in the range of 10 to 3200. Signed-off-by: Claudio Takahasi Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 59 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index ce781a4..e0e7b82 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2501,6 +2501,63 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm return 0; } +static int inline l2cap_check_conn_param(u16 min, u16 max, u16 latency, + u16 to_multiplier) +{ + u16 max_latency; + + if (min > max || min < 6 || max > 3200) + return -EINVAL; + + if (to_multiplier < 10 || to_multiplier > 3200) + return -EINVAL; + + if (max >= to_multiplier * 8) + return -EINVAL; + + max_latency = (to_multiplier * 8 / max) - 1; + if (latency > 499 || latency > max_latency) + return -EINVAL; + + return 0; +} + +static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, + struct l2cap_cmd_hdr *cmd, u8 *data) +{ + struct hci_conn *hcon = conn->hcon; + struct l2cap_conn_param_update_req *req; + struct l2cap_conn_param_update_rsp rsp; + u16 min, max, latency, to_multiplier, cmd_len; + + if (!(hcon->link_mode & HCI_LM_MASTER)) + return -EINVAL; + + cmd_len = __le16_to_cpu(cmd->len); + if (cmd_len != sizeof(struct l2cap_conn_param_update_req)) + return -EPROTO; + + req = (struct l2cap_conn_param_update_req *) data; + min = __le16_to_cpu(req->min); + max = __le16_to_cpu(req->max); + latency = __le16_to_cpu(req->latency); + to_multiplier = __le16_to_cpu(req->to_multiplier); + + BT_DBG("min 0x%4.4x max 0x%4.4x latency: 0x%4.4x Timeout: 0x%4.4x", + min, max, latency, to_multiplier); + + memset(&rsp, 0, sizeof(rsp)); + if (l2cap_check_conn_param(min, max, latency, to_multiplier)) + rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); + else + rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED); + + l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, + sizeof(rsp), &rsp); + + return 0; +} + static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u16 cmd_len, u8 *data) { @@ -2567,7 +2624,7 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, return 0; case L2CAP_CONN_PARAM_UPDATE_REQ: - return -EINVAL; + return l2cap_conn_param_update_req(conn, cmd, data); case L2CAP_CONN_PARAM_UPDATE_RSP: return 0; -- cgit v1.1 From c6f3c5f7f2938d2809bcc15889e9aa212038a554 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 15 Feb 2011 20:22:03 -0300 Subject: Bluetooth: Fix crash when ioctl(HCIUARTSETPROTO) fails If the fail happens the HCI del_timer may timeout after the the hci dev unregister. This lead to a kernel crash. Reported-by: Vinicius Costa Gomes Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 173bebd..c01415b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1199,6 +1199,8 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_unregister_sysfs(hdev); + hci_del_off_timer(hdev); + destroy_workqueue(hdev->workqueue); hci_dev_lock_bh(hdev); -- cgit v1.1 From 6bd32326cdaa9b14794416150c88e4832fb7e592 Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Wed, 16 Feb 2011 16:32:41 +0200 Subject: Bluetooth: Use proper timer for hci command timout Use proper timer instead of hci command flow control to timeout failed hci commands. Otherwise stack ends up sending commands when flow control is used to block new commands. 2010-09-01 18:29:41.592132 < HCI Command: Remote Name Request (0x01|0x0019) plen 10 bdaddr 00:16:CF:E1:C7:D7 mode 2 clkoffset 0x0000 2010-09-01 18:29:41.592681 > HCI Event: Command Status (0x0f) plen 4 Remote Name Request (0x01|0x0019) status 0x00 ncmd 0 2010-09-01 18:29:51.022033 < HCI Command: Remote Name Request Cancel (0x01|0x001a) plen 6 bdaddr 00:16:CF:E1:C7:D7 Signed-off-by: Ville Tervo Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 22 ++++++++++++++++------ net/bluetooth/hci_event.c | 6 ++++++ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c01415b..702d565 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -623,6 +624,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) /* Drop last sent command */ if (hdev->sent_cmd) { + del_timer_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } @@ -1066,6 +1068,16 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) return 0; } +/* HCI command timer function */ +static void hci_cmd_timer(unsigned long arg) +{ + struct hci_dev *hdev = (void *) arg; + + BT_ERR("%s command tx timeout", hdev->name); + atomic_set(&hdev->cmd_cnt, 1); + tasklet_schedule(&hdev->cmd_task); +} + /* Register HCI device */ int hci_register_dev(struct hci_dev *hdev) { @@ -1112,6 +1124,8 @@ int hci_register_dev(struct hci_dev *hdev) skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); + setup_timer(&hdev->cmd_timer, hci_cmd_timer, (unsigned long) hdev); + for (i = 0; i < NUM_REASSEMBLY; i++) hdev->reassembly[i] = NULL; @@ -2004,11 +2018,6 @@ static void hci_cmd_task(unsigned long arg) BT_DBG("%s cmd %d", hdev->name, atomic_read(&hdev->cmd_cnt)); - if (!atomic_read(&hdev->cmd_cnt) && time_after(jiffies, hdev->cmd_last_tx + HZ)) { - BT_ERR("%s command tx timeout", hdev->name); - atomic_set(&hdev->cmd_cnt, 1); - } - /* Send queued commands */ if (atomic_read(&hdev->cmd_cnt)) { skb = skb_dequeue(&hdev->cmd_q); @@ -2021,7 +2030,8 @@ static void hci_cmd_task(unsigned long arg) if (hdev->sent_cmd) { atomic_dec(&hdev->cmd_cnt); hci_send_frame(skb); - hdev->cmd_last_tx = jiffies; + mod_timer(&hdev->cmd_timer, + jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT)); } else { skb_queue_head(&hdev->cmd_q, skb); tasklet_schedule(&hdev->cmd_task); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 74f04a2..09cb29e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1732,6 +1732,9 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk break; } + if (ev->opcode != HCI_OP_NOP) + del_timer(&hdev->cmd_timer); + if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) @@ -1807,6 +1810,9 @@ static inline void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb) break; } + if (ev->opcode != HCI_OP_NOP) + del_timer(&hdev->cmd_timer); + if (ev->ncmd) { atomic_set(&hdev->cmd_cnt, 1); if (!skb_queue_empty(&hdev->cmd_q)) -- cgit v1.1 From 2ce603ebe1f1420c7c5b013638ec29b4fc975180 Mon Sep 17 00:00:00 2001 From: Claudio Takahasi Date: Wed, 16 Feb 2011 20:44:53 -0200 Subject: Bluetooth: Send LE Connection Update Command If the new connection update parameter are accepted, the LE master host sends the LE Connection Update Command to its controller informing the new requested parameters. Signed-off-by: Claudio Takahasi Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 20 ++++++++++++++++++++ net/bluetooth/l2cap_core.c | 8 +++++++- 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index efcd2b5..a050a69 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -183,6 +183,26 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); } +void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, + u16 latency, u16 to_multiplier) +{ + struct hci_cp_le_conn_update cp; + struct hci_dev *hdev = conn->hdev; + + memset(&cp, 0, sizeof(cp)); + + cp.handle = cpu_to_le16(conn->handle); + cp.conn_interval_min = cpu_to_le16(min); + cp.conn_interval_max = cpu_to_le16(max); + cp.conn_latency = cpu_to_le16(latency); + cp.supervision_timeout = cpu_to_le16(to_multiplier); + cp.min_ce_len = cpu_to_le16(0x0001); + cp.max_ce_len = cpu_to_le16(0x0001); + + hci_send_cmd(hdev, HCI_OP_LE_CONN_UPDATE, sizeof(cp), &cp); +} +EXPORT_SYMBOL(hci_le_conn_update); + /* Device _must_ be locked */ void hci_sco_setup(struct hci_conn *conn, __u8 status) { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index e0e7b82..bd31367 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2529,6 +2529,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, struct l2cap_conn_param_update_req *req; struct l2cap_conn_param_update_rsp rsp; u16 min, max, latency, to_multiplier, cmd_len; + int err; if (!(hcon->link_mode & HCI_LM_MASTER)) return -EINVAL; @@ -2547,7 +2548,9 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, min, max, latency, to_multiplier); memset(&rsp, 0, sizeof(rsp)); - if (l2cap_check_conn_param(min, max, latency, to_multiplier)) + + err = l2cap_check_conn_param(min, max, latency, to_multiplier); + if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_ACCEPTED); @@ -2555,6 +2558,9 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_PARAM_UPDATE_RSP, sizeof(rsp), &rsp); + if (!err) + hci_le_conn_update(hcon, min, max, latency, to_multiplier); + return 0; } -- cgit v1.1 From 7235975383785276bdcfc13dd73cc87739cd25d0 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 17 Feb 2011 14:16:32 +0100 Subject: Bluetooth: Use #include instead of As warned by checkpatch.pl, use #include instead of . Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b2bda830..f5ef7a3 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -22,7 +22,7 @@ /* Bluetooth HCI Management interface */ -#include +#include #include #include -- cgit v1.1 From 0786f8b7777721c0d3d5dd691692eaee696e94e1 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 17 Feb 2011 14:16:33 +0100 Subject: Bluetooth: Clean up hci_sniff_subrate_evt function Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 09cb29e..1741936 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2242,17 +2242,8 @@ static inline void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buf static inline void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_sniff_subrate *ev = (void *) skb->data; - struct hci_conn *conn; BT_DBG("%s status %d", hdev->name, ev->status); - - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); - if (conn) { - } - - hci_dev_unlock(hdev); } static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct sk_buff *skb) -- cgit v1.1 From 01df8c31d152493ddc58a0bd1719eac6759add87 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 17 Feb 2011 16:46:47 +0100 Subject: Bluetooth: Fix some code style issues in hci_core.c Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 702d565..b372fb8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -124,7 +124,7 @@ static void hci_req_cancel(struct hci_dev *hdev, int err) /* Execute request and wait for completion. */ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, __u32 timeout) { DECLARE_WAITQUEUE(wait, current); int err = 0; @@ -166,7 +166,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, } static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, unsigned long opt), - unsigned long opt, __u32 timeout) + unsigned long opt, __u32 timeout) { int ret; @@ -465,7 +465,7 @@ int hci_inquiry(void __user *arg) /* cache_dump can't sleep. Therefore we allocate temp buffer and then * copy it to the user space. */ - buf = kmalloc(sizeof(struct inquiry_info) *max_rsp, GFP_KERNEL); + buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL); if (!buf) { err = -ENOMEM; goto done; @@ -534,7 +534,6 @@ int hci_dev_open(__u16 dev) set_bit(HCI_INIT, &hdev->flags); hdev->init_last_cmd = 0; - //__hci_request(hdev, hci_reset_req, 0, HZ); ret = __hci_request(hdev, hci_init_req, 0, msecs_to_jiffies(HCI_INIT_TIMEOUT)); -- cgit v1.1 From 138d22ef14bf00e44de7885cd03f0c3b6ac168f5 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Thu, 17 Feb 2011 16:44:23 +0100 Subject: Bluetooth: Fix some code style issues in hci_event.c Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1741936..98b5764 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -938,7 +938,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status) } static int hci_outgoing_auth_needed(struct hci_dev *hdev, - struct hci_conn *conn) + struct hci_conn *conn) { if (conn->state != BT_CONFIG || !conn->out) return 0; @@ -1293,7 +1293,8 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); - if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) { + if ((mask & HCI_LM_ACCEPT) && + !hci_blacklist_lookup(hdev, &ev->bdaddr)) { /* Connection accepted */ struct inquiry_entry *ie; struct hci_conn *conn; @@ -2101,7 +2102,8 @@ static inline void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, struct hci_dev_lock(hdev); if ((skb->len - 1) / num_rsp != sizeof(struct inquiry_info_with_rssi)) { - struct inquiry_info_with_rssi_and_pscan_mode *info = (void *) (skb->data + 1); + struct inquiry_info_with_rssi_and_pscan_mode *info; + info = (void *) (skb->data + 1); for (; num_rsp; num_rsp--) { bacpy(&data.bdaddr, &info->bdaddr); @@ -2261,12 +2263,12 @@ static inline void hci_extended_inquiry_result_evt(struct hci_dev *hdev, struct for (; num_rsp; num_rsp--) { bacpy(&data.bdaddr, &info->bdaddr); - data.pscan_rep_mode = info->pscan_rep_mode; - data.pscan_period_mode = info->pscan_period_mode; - data.pscan_mode = 0x00; + data.pscan_rep_mode = info->pscan_rep_mode; + data.pscan_period_mode = info->pscan_period_mode; + data.pscan_mode = 0x00; memcpy(data.dev_class, info->dev_class, 3); - data.clock_offset = info->clock_offset; - data.rssi = info->rssi; + data.clock_offset = info->clock_offset; + data.rssi = info->rssi; data.ssp_mode = 0x01; info++; hci_inquiry_cache_update(hdev, &data); -- cgit v1.1 From 9a279ea3a77ebcc91b68f0546e7cfa5018a12513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:16 +0000 Subject: ethtool: move EXPORT_SYMBOL(ethtool_op_set_tx_csum) to correct place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 5984ee0..9eb8277 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -55,6 +55,7 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) return 0; } +EXPORT_SYMBOL(ethtool_op_set_tx_csum); int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) { @@ -1124,7 +1125,6 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tx_csum(dev, edata.data); } -EXPORT_SYMBOL(ethtool_op_set_tx_csum); static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) { -- cgit v1.1 From 212b573f5552c60265da721ff9ce32e3462a2cdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:16 +0000 Subject: ethtool: enable GSO and GRO by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/dev.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 4580460..8686f6f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5274,6 +5274,12 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) features &= ~NETIF_F_TSO; } + /* Software GSO depends on SG. */ + if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) { + netdev_info(dev, "Dropping NETIF_F_GSO since no SG feature.\n"); + features &= ~NETIF_F_GSO; + } + /* UFO needs SG and checksumming */ if (features & NETIF_F_UFO) { /* maybe split UFO into V4 and V6? */ @@ -5430,11 +5436,15 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - dev->features = netdev_fix_features(dev, dev->features); + /* Enable software offloads by default - will be stripped in + * netdev_fix_features() if not supported. */ + dev->features |= NETIF_F_SOFT_FEATURES; - /* Enable software GSO if SG is supported. */ - if (dev->features & NETIF_F_SG) - dev->features |= NETIF_F_GSO; + /* Avoid warning from netdev_fix_features() for GSO without SG */ + if (!(dev->features & NETIF_F_SG)) + dev->features &= ~NETIF_F_GSO; + + dev->features = netdev_fix_features(dev, dev->features); /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features -- cgit v1.1 From 340ae1654c0667e0cdd2a6d4dc16f7946e018881 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:16 +0000 Subject: ethtool: factorize ethtool_get_strings() and ethtool_get_sset_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is needed for unified offloads patch. Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/ethtool.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9eb8277..85aaeab 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -172,6 +172,25 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); /* Handlers for each ethtool command */ +static int __ethtool_get_sset_count(struct net_device *dev, int sset) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (ops && ops->get_sset_count && ops->get_strings) + return ops->get_sset_count(dev, sset); + else + return -EOPNOTSUPP; +} + +static void __ethtool_get_strings(struct net_device *dev, + u32 stringset, u8 *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + /* ops->get_strings is valid because checked earlier */ + ops->get_strings(dev, stringset, data); +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -252,14 +271,10 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, void __user *useraddr) { struct ethtool_sset_info info; - const struct ethtool_ops *ops = dev->ethtool_ops; u64 sset_mask; int i, idx = 0, n_bits = 0, ret, rc; u32 *info_buf = NULL; - if (!ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&info, useraddr, sizeof(info))) return -EFAULT; @@ -286,7 +301,7 @@ static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, if (!(sset_mask & (1ULL << i))) continue; - rc = ops->get_sset_count(dev, i); + rc = __ethtool_get_sset_count(dev, i); if (rc >= 0) { info.sset_mask |= (1ULL << i); info_buf[idx++] = rc; @@ -1287,17 +1302,13 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) { struct ethtool_gstrings gstrings; - const struct ethtool_ops *ops = dev->ethtool_ops; u8 *data; int ret; - if (!ops->get_strings || !ops->get_sset_count) - return -EOPNOTSUPP; - if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - ret = ops->get_sset_count(dev, gstrings.string_set); + ret = __ethtool_get_sset_count(dev, gstrings.string_set); if (ret < 0) return ret; @@ -1307,7 +1318,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) if (!data) return -ENOMEM; - ops->get_strings(dev, gstrings.string_set, data); + __ethtool_get_strings(dev, gstrings.string_set, data); ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1317,7 +1328,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) goto out; ret = 0; - out: +out: kfree(data); return ret; } -- cgit v1.1 From 0a417704777ed29d0e8c72b7274a328e61248e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:17 +0000 Subject: ethtool: factorize get/set_one_feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows to enable GRO even if RX csum is disabled. GRO will not be used for packets without hardware checksum anyway. Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/ethtool.c | 274 ++++++++++++++++++++++++++--------------------------- 1 file changed, 132 insertions(+), 142 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 85aaeab..c3fb8f9 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -191,6 +191,109 @@ static void __ethtool_get_strings(struct net_device *dev, ops->get_strings(dev, stringset, data); } +static u32 ethtool_get_feature_mask(u32 eth_cmd) +{ + /* feature masks of legacy discrete ethtool ops */ + + switch (eth_cmd) { + case ETHTOOL_GTXCSUM: + case ETHTOOL_STXCSUM: + return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + case ETHTOOL_GSG: + case ETHTOOL_SSG: + return NETIF_F_SG; + case ETHTOOL_GTSO: + case ETHTOOL_STSO: + return NETIF_F_ALL_TSO; + case ETHTOOL_GUFO: + case ETHTOOL_SUFO: + return NETIF_F_UFO; + case ETHTOOL_GGSO: + case ETHTOOL_SGSO: + return NETIF_F_GSO; + case ETHTOOL_GGRO: + case ETHTOOL_SGRO: + return NETIF_F_GRO; + default: + BUG(); + } +} + +static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops) + return NULL; + + switch (ethcmd) { + case ETHTOOL_GTXCSUM: + return ops->get_tx_csum; + case ETHTOOL_SSG: + return ops->get_sg; + case ETHTOOL_STSO: + return ops->get_tso; + case ETHTOOL_SUFO: + return ops->get_ufo; + default: + return NULL; + } +} + +static int ethtool_get_one_feature(struct net_device *dev, + char __user *useraddr, u32 ethcmd) +{ + struct ethtool_value edata = { + .cmd = ethcmd, + .data = !!(dev->features & ethtool_get_feature_mask(ethcmd)), + }; + u32 (*actor)(struct net_device *); + + actor = __ethtool_get_one_feature_actor(dev, ethcmd); + if (actor) + edata.data = actor(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_sg(struct net_device *dev, u32 data); +static int __ethtool_set_tso(struct net_device *dev, u32 data); +static int __ethtool_set_ufo(struct net_device *dev, u32 data); + +static int ethtool_set_one_feature(struct net_device *dev, + void __user *useraddr, u32 ethcmd) +{ + struct ethtool_value edata; + u32 mask; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + switch (ethcmd) { + case ETHTOOL_STXCSUM: + return __ethtool_set_tx_csum(dev, edata.data); + case ETHTOOL_SSG: + return __ethtool_set_sg(dev, edata.data); + case ETHTOOL_STSO: + return __ethtool_set_tso(dev, edata.data); + case ETHTOOL_SUFO: + return __ethtool_set_ufo(dev, edata.data); + case ETHTOOL_SGSO: + case ETHTOOL_SGRO: + mask = ethtool_get_feature_mask(ethcmd); + if (edata.data) + dev->features |= mask; + else + dev->features &= ~mask; + return 0; + default: + return -EOPNOTSUPP; + } +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -1107,6 +1210,9 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; + if (data && !(dev->features & NETIF_F_ALL_CSUM)) + return -EINVAL; + if (!data && dev->ethtool_ops->set_tso) { err = dev->ethtool_ops->set_tso(dev, 0); if (err) @@ -1121,24 +1227,20 @@ static int __ethtool_set_sg(struct net_device *dev, u32 data) return dev->ethtool_ops->set_sg(dev, data); } -static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; int err; if (!dev->ethtool_ops->set_tx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) { + if (!data && dev->ethtool_ops->set_sg) { err = __ethtool_set_sg(dev, 0); if (err) return err; } - return dev->ethtool_ops->set_tx_csum(dev, edata.data); + return dev->ethtool_ops->set_tx_csum(dev, data); } static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) @@ -1157,108 +1259,28 @@ static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_rx_csum(dev, edata.data); } -static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_sg) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && - !(dev->features & NETIF_F_ALL_CSUM)) - return -EINVAL; - - return __ethtool_set_sg(dev, edata.data); -} - -static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_tso(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_tso) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - return dev->ethtool_ops->set_tso(dev, edata.data); + return dev->ethtool_ops->set_tso(dev, data); } -static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_ufo(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_ufo) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data && !(dev->features & NETIF_F_SG)) + if (data && !(dev->features & NETIF_F_SG)) return -EINVAL; - if (edata.data && !((dev->features & NETIF_F_GEN_CSUM) || + if (data && !((dev->features & NETIF_F_GEN_CSUM) || (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) return -EINVAL; - return dev->ethtool_ops->set_ufo(dev, edata.data); -} - -static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGSO }; - - edata.data = dev->features & NETIF_F_GSO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if (edata.data) - dev->features |= NETIF_F_GSO; - else - dev->features &= ~NETIF_F_GSO; - return 0; -} - -static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GGRO }; - - edata.data = dev->features & NETIF_F_GRO; - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (edata.data) { - u32 rxcsum = dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum(dev) : - ethtool_op_get_rx_csum(dev); - - if (!rxcsum) - return -EINVAL; - dev->features |= NETIF_F_GRO; - } else - dev->features &= ~NETIF_F_GRO; - - return 0; + return dev->ethtool_ops->set_ufo(dev, data); } static int ethtool_self_test(struct net_device *dev, char __user *useraddr) @@ -1590,33 +1612,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCSUM: rc = ethtool_set_rx_csum(dev, useraddr); break; - case ETHTOOL_GTXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tx_csum ? - dev->ethtool_ops->get_tx_csum : - ethtool_op_get_tx_csum)); - break; - case ETHTOOL_STXCSUM: - rc = ethtool_set_tx_csum(dev, useraddr); - break; - case ETHTOOL_GSG: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_sg ? - dev->ethtool_ops->get_sg : - ethtool_op_get_sg)); - break; - case ETHTOOL_SSG: - rc = ethtool_set_sg(dev, useraddr); - break; - case ETHTOOL_GTSO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_tso ? - dev->ethtool_ops->get_tso : - ethtool_op_get_tso)); - break; - case ETHTOOL_STSO: - rc = ethtool_set_tso(dev, useraddr); - break; case ETHTOOL_TEST: rc = ethtool_self_test(dev, useraddr); break; @@ -1632,21 +1627,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPERMADDR: rc = ethtool_get_perm_addr(dev, useraddr); break; - case ETHTOOL_GUFO: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_ufo ? - dev->ethtool_ops->get_ufo : - ethtool_op_get_ufo)); - break; - case ETHTOOL_SUFO: - rc = ethtool_set_ufo(dev, useraddr); - break; - case ETHTOOL_GGSO: - rc = ethtool_get_gso(dev, useraddr); - break; - case ETHTOOL_SGSO: - rc = ethtool_set_gso(dev, useraddr); - break; case ETHTOOL_GFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, (dev->ethtool_ops->get_flags ? @@ -1677,12 +1657,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCLSRLINS: rc = ethtool_set_rxnfc(dev, ethcmd, useraddr); break; - case ETHTOOL_GGRO: - rc = ethtool_get_gro(dev, useraddr); - break; - case ETHTOOL_SGRO: - rc = ethtool_set_gro(dev, useraddr); - break; case ETHTOOL_FLASHDEV: rc = ethtool_flash_device(dev, useraddr); break; @@ -1704,6 +1678,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GTXCSUM: + case ETHTOOL_GSG: + case ETHTOOL_GTSO: + case ETHTOOL_GUFO: + case ETHTOOL_GGSO: + case ETHTOOL_GGRO: + rc = ethtool_get_one_feature(dev, useraddr, ethcmd); + break; + case ETHTOOL_STXCSUM: + case ETHTOOL_SSG: + case ETHTOOL_STSO: + case ETHTOOL_SUFO: + case ETHTOOL_SGSO: + case ETHTOOL_SGRO: + rc = ethtool_set_one_feature(dev, useraddr, ethcmd); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.1 From 5455c6998d34dc983a8693500e4dffefc3682dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:17 +0000 Subject: net: Introduce new feature setting ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This introduces a new framework to handle device features setting. It consists of: - new fields in struct net_device: + hw_features - features that hw/driver supports toggling + wanted_features - features that user wants enabled, when possible - new netdev_ops: + feat = ndo_fix_features(dev, feat) - API checking constraints for enabling features or their combinations + ndo_set_features(dev) - API updating hardware state to match changed dev->features - new ethtool commands: + ETHTOOL_GFEATURES/ETHTOOL_SFEATURES: get/set dev->wanted_features and trigger device reconfiguration if resulting dev->features changed + ETHTOOL_GSTRINGS(ETH_SS_FEATURES): get feature bits names (meaning) Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/dev.c | 46 +++++++++++++++++--- net/core/ethtool.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 163 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 8686f6f..4f69439 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5302,6 +5302,37 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } EXPORT_SYMBOL(netdev_fix_features); +void netdev_update_features(struct net_device *dev) +{ + u32 features; + int err = 0; + + features = netdev_get_wanted_features(dev); + + if (dev->netdev_ops->ndo_fix_features) + features = dev->netdev_ops->ndo_fix_features(dev, features); + + /* driver might be less strict about feature dependencies */ + features = netdev_fix_features(dev, features); + + if (dev->features == features) + return; + + netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", + dev->features, features); + + if (dev->netdev_ops->ndo_set_features) + err = dev->netdev_ops->ndo_set_features(dev, features); + + if (!err) + dev->features = features; + else if (err < 0) + netdev_err(dev, + "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", + err, features, dev->features); +} +EXPORT_SYMBOL(netdev_update_features); + /** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from @@ -5436,15 +5467,18 @@ int register_netdevice(struct net_device *dev) if (dev->iflink == -1) dev->iflink = dev->ifindex; - /* Enable software offloads by default - will be stripped in - * netdev_fix_features() if not supported. */ - dev->features |= NETIF_F_SOFT_FEATURES; + /* Transfer changeable features to wanted_features and enable + * software offloads (GSO and GRO). + */ + dev->hw_features |= NETIF_F_SOFT_FEATURES; + dev->wanted_features = (dev->features & dev->hw_features) + | NETIF_F_SOFT_FEATURES; /* Avoid warning from netdev_fix_features() for GSO without SG */ - if (!(dev->features & NETIF_F_SG)) - dev->features &= ~NETIF_F_GSO; + if (!(dev->wanted_features & NETIF_F_SG)) + dev->wanted_features &= ~NETIF_F_GSO; - dev->features = netdev_fix_features(dev, dev->features); + netdev_update_features(dev); /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c3fb8f9..9577396 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -172,10 +172,120 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); /* Handlers for each ethtool command */ +#define ETHTOOL_DEV_FEATURE_WORDS 1 + +static int ethtool_get_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_gfeatures cmd = { + .cmd = ETHTOOL_GFEATURES, + .size = ETHTOOL_DEV_FEATURE_WORDS, + }; + struct ethtool_get_features_block features[ETHTOOL_DEV_FEATURE_WORDS] = { + { + .available = dev->hw_features, + .requested = dev->wanted_features, + .active = dev->features, + .never_changed = NETIF_F_NEVER_CHANGE, + }, + }; + u32 __user *sizeaddr; + u32 copy_size; + + sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); + if (get_user(copy_size, sizeaddr)) + return -EFAULT; + + if (copy_size > ETHTOOL_DEV_FEATURE_WORDS) + copy_size = ETHTOOL_DEV_FEATURE_WORDS; + + if (copy_to_user(useraddr, &cmd, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + if (copy_to_user(useraddr, features, copy_size * sizeof(*features))) + return -EFAULT; + + return 0; +} + +static int ethtool_set_features(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_sfeatures cmd; + struct ethtool_set_features_block features[ETHTOOL_DEV_FEATURE_WORDS]; + int ret = 0; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + useraddr += sizeof(cmd); + + if (cmd.size != ETHTOOL_DEV_FEATURE_WORDS) + return -EINVAL; + + if (copy_from_user(features, useraddr, sizeof(features))) + return -EFAULT; + + if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) + return -EINVAL; + + if (features[0].valid & ~dev->hw_features) { + features[0].valid &= dev->hw_features; + ret |= ETHTOOL_F_UNSUPPORTED; + } + + dev->wanted_features &= ~features[0].valid; + dev->wanted_features |= features[0].valid & features[0].requested; + netdev_update_features(dev); + + if ((dev->wanted_features ^ dev->features) & features[0].valid) + ret |= ETHTOOL_F_WISH; + + return ret; +} + +static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GSTRING_LEN] = { + /* NETIF_F_SG */ "tx-scatter-gather", + /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", + /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", + /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", + /* NETIF_F_IPV6_CSUM */ "tx_checksum-ipv6", + /* NETIF_F_HIGHDMA */ "highdma", + /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", + /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", + + /* NETIF_F_HW_VLAN_RX */ "rx-vlan-hw-parse", + /* NETIF_F_HW_VLAN_FILTER */ "rx-vlan-filter", + /* NETIF_F_VLAN_CHALLENGED */ "vlan-challenged", + /* NETIF_F_GSO */ "tx-generic-segmentation", + /* NETIF_F_LLTX */ "tx-lockless", + /* NETIF_F_NETNS_LOCAL */ "netns-local", + /* NETIF_F_GRO */ "rx-gro", + /* NETIF_F_LRO */ "rx-lro", + + /* NETIF_F_TSO */ "tx-tcp-segmentation", + /* NETIF_F_UFO */ "tx-udp-fragmentation", + /* NETIF_F_GSO_ROBUST */ "tx-gso-robust", + /* NETIF_F_TSO_ECN */ "tx-tcp-ecn-segmentation", + /* NETIF_F_TSO6 */ "tx-tcp6-segmentation", + /* NETIF_F_FSO */ "tx-fcoe-segmentation", + "", + "", + + /* NETIF_F_FCOE_CRC */ "tx-checksum-fcoe-crc", + /* NETIF_F_SCTP_CSUM */ "tx-checksum-sctp", + /* NETIF_F_FCOE_MTU */ "fcoe-mtu", + /* NETIF_F_NTUPLE */ "rx-ntuple-filter", + /* NETIF_F_RXHASH */ "rx-hashing", + "", + "", + "", +}; + static int __ethtool_get_sset_count(struct net_device *dev, int sset) { const struct ethtool_ops *ops = dev->ethtool_ops; + if (sset == ETH_SS_FEATURES) + return ARRAY_SIZE(netdev_features_strings); + if (ops && ops->get_sset_count && ops->get_strings) return ops->get_sset_count(dev, sset); else @@ -187,8 +297,12 @@ static void __ethtool_get_strings(struct net_device *dev, { const struct ethtool_ops *ops = dev->ethtool_ops; - /* ops->get_strings is valid because checked earlier */ - ops->get_strings(dev, stringset, data); + if (stringset == ETH_SS_FEATURES) + memcpy(data, netdev_features_strings, + sizeof(netdev_features_strings)); + else + /* ops->get_strings is valid because checked earlier */ + ops->get_strings(dev, stringset, data); } static u32 ethtool_get_feature_mask(u32 eth_cmd) @@ -1533,6 +1647,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCLSRLCNT: case ETHTOOL_GRXCLSRULE: case ETHTOOL_GRXCLSRLALL: + case ETHTOOL_GFEATURES: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1678,6 +1793,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFHINDIR: rc = ethtool_set_rxfh_indir(dev, useraddr); break; + case ETHTOOL_GFEATURES: + rc = ethtool_get_features(dev, useraddr); + break; + case ETHTOOL_SFEATURES: + rc = ethtool_set_features(dev, useraddr); + break; case ETHTOOL_GTXCSUM: case ETHTOOL_GSG: case ETHTOOL_GTSO: -- cgit v1.1 From 86794881c29a7ea6271644b49ad81518cabda96b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:17 +0000 Subject: net: ethtool: use ndo_fix_features for offload setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/ethtool.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9577396..6599997 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -357,15 +357,21 @@ static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) static int ethtool_get_one_feature(struct net_device *dev, char __user *useraddr, u32 ethcmd) { + u32 mask = ethtool_get_feature_mask(ethcmd); struct ethtool_value edata = { .cmd = ethcmd, - .data = !!(dev->features & ethtool_get_feature_mask(ethcmd)), + .data = !!(dev->features & mask), }; - u32 (*actor)(struct net_device *); - actor = __ethtool_get_one_feature_actor(dev, ethcmd); - if (actor) - edata.data = actor(dev); + /* compatibility with discrete get_ ops */ + if (!(dev->hw_features & mask)) { + u32 (*actor)(struct net_device *); + + actor = __ethtool_get_one_feature_actor(dev, ethcmd); + + if (actor) + edata.data = actor(dev); + } if (copy_to_user(useraddr, &edata, sizeof(edata))) return -EFAULT; @@ -386,6 +392,27 @@ static int ethtool_set_one_feature(struct net_device *dev, if (copy_from_user(&edata, useraddr, sizeof(edata))) return -EFAULT; + mask = ethtool_get_feature_mask(ethcmd); + mask &= dev->hw_features; + if (mask) { + if (edata.data) + dev->wanted_features |= mask; + else + dev->wanted_features &= ~mask; + + netdev_update_features(dev); + return 0; + } + + /* Driver is not converted to ndo_fix_features or does not + * support changing this offload. In the latter case it won't + * have corresponding ethtool_ops field set. + * + * Following part is to be removed after all drivers advertise + * their changeable features in netdev->hw_features and stop + * using discrete offload setting ops. + */ + switch (ethcmd) { case ETHTOOL_STXCSUM: return __ethtool_set_tx_csum(dev, edata.data); @@ -395,14 +422,6 @@ static int ethtool_set_one_feature(struct net_device *dev, return __ethtool_set_tso(dev, edata.data); case ETHTOOL_SUFO: return __ethtool_set_ufo(dev, edata.data); - case ETHTOOL_SGSO: - case ETHTOOL_SGRO: - mask = ethtool_get_feature_mask(ethcmd); - if (edata.data) - dev->features |= mask; - else - dev->features &= ~mask; - return 0; default: return -EOPNOTSUPP; } -- cgit v1.1 From da8ac86c4a56a14bf8deea7d2f92d0a453c67f91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:18 +0000 Subject: net: use ndo_fix_features for ethtool_ops->set_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/ethtool.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6599997..65b3d50 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -427,6 +427,34 @@ static int ethtool_set_one_feature(struct net_device *dev, } } +static int __ethtool_set_flags(struct net_device *dev, u32 data) +{ + u32 changed; + + if (data & ~flags_dup_features) + return -EINVAL; + + /* legacy set_flags() op */ + if (dev->ethtool_ops->set_flags) { + if (unlikely(dev->hw_features & flags_dup_features)) + netdev_warn(dev, + "driver BUG: mixed hw_features and set_flags()\n"); + return dev->ethtool_ops->set_flags(dev, data); + } + + /* allow changing only bits set in hw_features */ + changed = (data ^ dev->wanted_features) & flags_dup_features; + if (changed & ~dev->hw_features) + return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; + + dev->wanted_features = + (dev->wanted_features & ~changed) | data; + + netdev_update_features(dev); + + return 0; +} + static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; @@ -1768,8 +1796,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) ethtool_op_get_flags)); break; case ETHTOOL_SFLAGS: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_flags); + rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags); break; case ETHTOOL_GPFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, -- cgit v1.1 From e83d360d9a7e5d71d55c13e96b19109a2ea23bf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 15 Feb 2011 16:59:18 +0000 Subject: net: introduce NETIF_F_RXCSUM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce NETIF_F_RXCSUM to replace device-private flags for RX checksum offload. Integrate it with ndo_fix_features. ethtool_op_get_rx_csum() is removed altogether as nothing in-tree uses it. Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/ethtool.c | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 65b3d50..66cdc76 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -34,12 +34,6 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -u32 ethtool_op_get_rx_csum(struct net_device *dev) -{ - return (dev->features & NETIF_F_ALL_CSUM) != 0; -} -EXPORT_SYMBOL(ethtool_op_get_rx_csum); - u32 ethtool_op_get_tx_csum(struct net_device *dev) { return (dev->features & NETIF_F_ALL_CSUM) != 0; @@ -274,7 +268,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS /* NETIF_F_FCOE_MTU */ "fcoe-mtu", /* NETIF_F_NTUPLE */ "rx-ntuple-filter", /* NETIF_F_RXHASH */ "rx-hashing", - "", + /* NETIF_F_RXCSUM */ "rx-checksum", "", "", }; @@ -313,6 +307,9 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd) case ETHTOOL_GTXCSUM: case ETHTOOL_STXCSUM: return NETIF_F_ALL_CSUM | NETIF_F_SCTP_CSUM; + case ETHTOOL_GRXCSUM: + case ETHTOOL_SRXCSUM: + return NETIF_F_RXCSUM; case ETHTOOL_GSG: case ETHTOOL_SSG: return NETIF_F_SG; @@ -343,6 +340,8 @@ static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) switch (ethcmd) { case ETHTOOL_GTXCSUM: return ops->get_tx_csum; + case ETHTOOL_GRXCSUM: + return ops->get_rx_csum; case ETHTOOL_SSG: return ops->get_sg; case ETHTOOL_STSO: @@ -354,6 +353,11 @@ static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd) } } +static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev) +{ + return !!(dev->features & NETIF_F_ALL_CSUM); +} + static int ethtool_get_one_feature(struct net_device *dev, char __user *useraddr, u32 ethcmd) { @@ -369,6 +373,10 @@ static int ethtool_get_one_feature(struct net_device *dev, actor = __ethtool_get_one_feature_actor(dev, ethcmd); + /* bug compatibility with old get_rx_csum */ + if (ethcmd == ETHTOOL_GRXCSUM && !actor) + actor = __ethtool_get_rx_csum_oldbug; + if (actor) edata.data = actor(dev); } @@ -379,6 +387,7 @@ static int ethtool_get_one_feature(struct net_device *dev, } static int __ethtool_set_tx_csum(struct net_device *dev, u32 data); +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data); static int __ethtool_set_sg(struct net_device *dev, u32 data); static int __ethtool_set_tso(struct net_device *dev, u32 data); static int __ethtool_set_ufo(struct net_device *dev, u32 data); @@ -416,6 +425,8 @@ static int ethtool_set_one_feature(struct net_device *dev, switch (ethcmd) { case ETHTOOL_STXCSUM: return __ethtool_set_tx_csum(dev, edata.data); + case ETHTOOL_SRXCSUM: + return __ethtool_set_rx_csum(dev, edata.data); case ETHTOOL_SSG: return __ethtool_set_sg(dev, edata.data); case ETHTOOL_STSO: @@ -1404,20 +1415,15 @@ static int __ethtool_set_tx_csum(struct net_device *dev, u32 data) return dev->ethtool_ops->set_tx_csum(dev, data); } -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) +static int __ethtool_set_rx_csum(struct net_device *dev, u32 data) { - struct ethtool_value edata; - if (!dev->ethtool_ops->set_rx_csum) return -EOPNOTSUPP; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - if (!edata.data && dev->ethtool_ops->set_sg) + if (!data) dev->features &= ~NETIF_F_GRO; - return dev->ethtool_ops->set_rx_csum(dev, edata.data); + return dev->ethtool_ops->set_rx_csum(dev, data); } static int __ethtool_set_tso(struct net_device *dev, u32 data) @@ -1765,15 +1771,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SPAUSEPARAM: rc = ethtool_set_pauseparam(dev, useraddr); break; - case ETHTOOL_GRXCSUM: - rc = ethtool_get_value(dev, useraddr, ethcmd, - (dev->ethtool_ops->get_rx_csum ? - dev->ethtool_ops->get_rx_csum : - ethtool_op_get_rx_csum)); - break; - case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); - break; case ETHTOOL_TEST: rc = ethtool_self_test(dev, useraddr); break; @@ -1846,6 +1843,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_set_features(dev, useraddr); break; case ETHTOOL_GTXCSUM: + case ETHTOOL_GRXCSUM: case ETHTOOL_GSG: case ETHTOOL_GTSO: case ETHTOOL_GUFO: @@ -1854,6 +1852,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_get_one_feature(dev, useraddr, ethcmd); break; case ETHTOOL_STXCSUM: + case ETHTOOL_SRXCSUM: case ETHTOOL_SSG: case ETHTOOL_STSO: case ETHTOOL_SUFO: -- cgit v1.1 From e2174ca430ec52375a02ed20859aeceb0d455b72 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Thu, 17 Feb 2011 19:16:55 -0300 Subject: Bluetooth: fix errors reported by checkpatch.pl Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 10 +++------- net/bluetooth/l2cap_sock.c | 30 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index bd31367..efcef0d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1671,10 +1671,6 @@ done: break; } - /* FIXME: Need actual value of the flush timeout */ - //if (flush_to != L2CAP_DEFAULT_FLUSH_TO) - // l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to); - req->dcid = cpu_to_le16(pi->dcid); req->flags = cpu_to_le16(0); @@ -2501,7 +2497,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm return 0; } -static int inline l2cap_check_conn_param(u16 min, u16 max, u16 latency, +static inline int l2cap_check_conn_param(u16 min, u16 max, u16 latency, u16 to_multiplier) { u16 max_latency; @@ -2539,8 +2535,8 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, return -EPROTO; req = (struct l2cap_conn_param_update_req *) data; - min = __le16_to_cpu(req->min); - max = __le16_to_cpu(req->max); + min = __le16_to_cpu(req->min); + max = __le16_to_cpu(req->max); latency = __le16_to_cpu(req->latency); to_multiplier = __le16_to_cpu(req->to_multiplier); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 484e717..fc85e7a 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1127,30 +1127,30 @@ static const struct net_proto_family l2cap_sock_family_ops = { int __init l2cap_init_sockets(void) { - int err; + int err; - err = proto_register(&l2cap_proto, 0); - if (err < 0) - return err; + err = proto_register(&l2cap_proto, 0); + if (err < 0) + return err; - err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); - if (err < 0) - goto error; + err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); + if (err < 0) + goto error; - BT_INFO("L2CAP socket layer initialized"); + BT_INFO("L2CAP socket layer initialized"); - return 0; + return 0; error: - BT_ERR("L2CAP socket registration failed"); - proto_unregister(&l2cap_proto); - return err; + BT_ERR("L2CAP socket registration failed"); + proto_unregister(&l2cap_proto); + return err; } void l2cap_cleanup_sockets(void) { - if (bt_sock_unregister(BTPROTO_L2CAP) < 0) - BT_ERR("L2CAP socket unregistration failed"); + if (bt_sock_unregister(BTPROTO_L2CAP) < 0) + BT_ERR("L2CAP socket unregistration failed"); - proto_unregister(&l2cap_proto); + proto_unregister(&l2cap_proto); } -- cgit v1.1 From 602f9887cdb14851631416d64ca27b48e2dd1f92 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Thu, 17 Feb 2011 19:22:19 -0300 Subject: Bluetooth: Fix errors reported by checkpatch.pl Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 23471dd..3c838a6 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -11,7 +11,7 @@ static struct class *bt_class; -struct dentry *bt_debugfs = NULL; +struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); static inline char *link_typetostr(int type) @@ -51,8 +51,8 @@ static ssize_t show_link_features(struct device *dev, struct device_attribute *a conn->features[6], conn->features[7]); } -#define LINK_ATTR(_name,_mode,_show,_store) \ -struct device_attribute link_attr_##_name = __ATTR(_name,_mode,_show,_store) +#define LINK_ATTR(_name, _mode, _show, _store) \ +struct device_attribute link_attr_##_name = __ATTR(_name, _mode, _show, _store) static LINK_ATTR(type, S_IRUGO, show_link_type, NULL); static LINK_ATTR(address, S_IRUGO, show_link_address, NULL); -- cgit v1.1 From 8ffd878419839638d1aea102455b575da39c1a62 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Thu, 17 Feb 2011 19:24:05 -0300 Subject: Bluetooth: fix checkpatch errors in af_bluetooth.c Signed-off-by: Gustavo F. Padovan --- net/bluetooth/af_bluetooth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index c258027..88af9eb 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -397,7 +397,7 @@ static inline unsigned int bt_accept_poll(struct sock *parent) return 0; } -unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait) +unsigned int bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; unsigned int mask = 0; -- cgit v1.1 From 5ada552746685d558d0a8e9e979921c75a41e469 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Feb 2011 15:29:00 -0800 Subject: ipv4: Simplify output route creation call sequence. There's a lot of redundancy and unnecessary stack frames in the output route creation path. 1) Make __mkroute_output() return error pointers. 2) Eliminate ip_mkroute_output() entirely, made possible by #1. 3) Call __mkroute_output() directly and handling the returning error pointers in ip_route_output_slow(). Signed-off-by: David S. Miller --- net/ipv4/route.c | 58 ++++++++++++++++++++++---------------------------------- 1 file changed, 23 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 756f544..849be48 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2323,33 +2323,32 @@ skip_cache: EXPORT_SYMBOL(ip_route_input_common); /* called with rcu_read_lock() */ -static int __mkroute_output(struct rtable **result, - struct fib_result *res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) +static struct rtable *__mkroute_output(struct fib_result *res, + const struct flowi *fl, + const struct flowi *oldflp, + struct net_device *dev_out, + unsigned int flags) { - struct rtable *rth; - struct in_device *in_dev; u32 tos = RT_FL_TOS(oldflp); + struct in_device *in_dev; + struct rtable *rth; if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) - return -EINVAL; + return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl->fl4_dst)) res->type = RTN_BROADCAST; else if (ipv4_is_multicast(fl->fl4_dst)) res->type = RTN_MULTICAST; else if (ipv4_is_zeronet(fl->fl4_dst)) - return -EINVAL; + return ERR_PTR(-EINVAL); if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; in_dev = __in_dev_get_rcu(dev_out); if (!in_dev) - return -EINVAL; + return ERR_PTR(-EINVAL); if (res->type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; @@ -2370,7 +2369,7 @@ static int __mkroute_output(struct rtable **result, rth = dst_alloc(&ipv4_dst_ops); if (!rth) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); atomic_set(&rth->dst.__refcnt, 1); rth->dst.flags= DST_HOST; @@ -2425,28 +2424,7 @@ static int __mkroute_output(struct rtable **result, rt_set_nexthop(rth, res, 0); rth->rt_flags = flags; - *result = rth; - return 0; -} - -/* called with rcu_read_lock() */ -static int ip_mkroute_output(struct rtable **rp, - struct fib_result *res, - const struct flowi *fl, - const struct flowi *oldflp, - struct net_device *dev_out, - unsigned flags) -{ - struct rtable *rth = NULL; - int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); - unsigned hash; - if (err == 0) { - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, - rt_genid(dev_net(dev_out))); - err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); - } - - return err; + return rth; } /* @@ -2469,6 +2447,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; + struct rtable *rth; int err; @@ -2627,7 +2606,16 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, make_route: - err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); + rth = __mkroute_output(&res, &fl, oldflp, dev_out, flags); + if (IS_ERR(rth)) + err = PTR_ERR(rth); + else { + unsigned int hash; + + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + rt_genid(dev_net(dev_out))); + err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); + } out: return err; } -- cgit v1.1 From 010c2708e536938a2f84d51d625f603b9a8f80ac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Feb 2011 15:37:09 -0800 Subject: ipv4: Move rcu_read_{lock,unlock}() into ip_route_output_slow(). Simplifies tail of __ip_route_output_key(). Signed-off-by: David S. Miller --- net/ipv4/route.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 849be48..b2b3c9e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2456,6 +2456,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, res.r = NULL; #endif + rcu_read_lock(); if (oldflp->fl4_src) { err = -EINVAL; if (ipv4_is_multicast(oldflp->fl4_src) || @@ -2617,15 +2618,16 @@ make_route: err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); } -out: return err; +out: + rcu_read_unlock(); + return err; } int __ip_route_output_key(struct net *net, struct rtable **rp, const struct flowi *flp) { - unsigned int hash; - int res; struct rtable *rth; + unsigned int hash; if (!rt_caching(net)) goto slow_output; @@ -2655,10 +2657,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, rcu_read_unlock_bh(); slow_output: - rcu_read_lock(); - res = ip_route_output_slow(net, rp, flp); - rcu_read_unlock(); - return res; + return ip_route_output_slow(net, rp, flp); } EXPORT_SYMBOL_GPL(__ip_route_output_key); -- cgit v1.1 From 0c4dcd58fd69aded93b0dc6917cd88b262c8aa3f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 17 Feb 2011 15:42:37 -0800 Subject: ipv4: Consolidate ipv4 dst allocation logic. This also allows us to combine all the dst->flags settings and avoid read/modify/write sequences to this struct member. Signed-off-by: David S. Miller --- net/ipv4/route.c | 52 +++++++++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b2b3c9e..79a2871 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1816,6 +1816,21 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) rt->rt_type = res->type; } +static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) +{ + struct rtable *rt = dst_alloc(&ipv4_dst_ops); + if (rt) { + rt->dst.obsolete = -1; + + atomic_set(&rt->dst.__refcnt, 1); + + rt->dst.flags = DST_HOST | + (nopolicy ? DST_NOPOLICY : 0) | + (noxfrm ? DST_NOXFRM : 0); + } + return rt; +} + /* called in rcu_read_lock() section */ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) @@ -1846,17 +1861,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (err < 0) goto e_err; } - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; rth->dst.output = ip_rt_bug; - rth->dst.obsolete = -1; - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -1985,19 +1995,13 @@ static int __mkroute_input(struct sk_buff *skb, } } - - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + IN_DEV_CONF_GET(out_dev, NOXFRM)); if (!rth) { err = -ENOBUFS; goto cleanup; } - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; - if (IN_DEV_CONF_GET(out_dev, NOXFRM)) - rth->dst.flags |= DST_NOXFRM; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2012,7 +2016,6 @@ static int __mkroute_input(struct sk_buff *skb, rth->fl.oif = 0; rth->rt_spec_dst= spec_dst; - rth->dst.obsolete = -1; rth->dst.input = ip_forward; rth->dst.output = ip_output; rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); @@ -2162,18 +2165,13 @@ brd_input: RT_CACHE_STAT_INC(in_brd); local_input: - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), false); if (!rth) goto e_nobufs; rth->dst.output= ip_rt_bug; - rth->dst.obsolete = -1; rth->rt_genid = rt_genid(net); - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; rth->fl.fl4_dst = daddr; rth->rt_dst = daddr; rth->fl.fl4_tos = tos; @@ -2366,18 +2364,11 @@ static struct rtable *__mkroute_output(struct fib_result *res, res->fi = NULL; } - - rth = dst_alloc(&ipv4_dst_ops); + rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), + IN_DEV_CONF_GET(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); - atomic_set(&rth->dst.__refcnt, 1); - rth->dst.flags= DST_HOST; - if (IN_DEV_CONF_GET(in_dev, NOXFRM)) - rth->dst.flags |= DST_NOXFRM; - if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) - rth->dst.flags |= DST_NOPOLICY; - rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_tos = tos; rth->fl.fl4_src = oldflp->fl4_src; @@ -2394,7 +2385,6 @@ static struct rtable *__mkroute_output(struct fib_result *res, rth->rt_spec_dst= fl->fl4_src; rth->dst.output=ip_output; - rth->dst.obsolete = -1; rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); -- cgit v1.1 From 3c7bd1a14071b99d6535b710bc998ae5d3abbb66 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Feb 2011 14:08:44 -0800 Subject: net: Add initial_ref arg to dst_alloc(). This allows avoiding multiple writes to the initial __refcnt. The most simplest cases of wanting an initial reference of "1" in ipv4 and ipv6 have been converted, the rest have been left along and kept at the existing "0". Signed-off-by: David S. Miller --- net/core/dst.c | 4 ++-- net/decnet/dn_route.c | 4 ++-- net/ipv4/route.c | 7 ++----- net/ipv6/route.c | 5 ++--- net/xfrm/xfrm_policy.c | 2 +- 5 files changed, 9 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index c1674fd..91104d3 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -166,7 +166,7 @@ EXPORT_SYMBOL(dst_discard); const u32 dst_default_metrics[RTAX_MAX]; -void *dst_alloc(struct dst_ops *ops) +void *dst_alloc(struct dst_ops *ops, int initial_ref) { struct dst_entry *dst; @@ -177,7 +177,7 @@ void *dst_alloc(struct dst_ops *ops) dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - atomic_set(&dst->__refcnt, 0); + atomic_set(&dst->__refcnt, initial_ref); dst->ops = ops; dst->lastuse = jiffies; dst->path = dst; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 42c9c62..06c054d 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1122,7 +1122,7 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops); + rt = dst_alloc(&dn_dst_ops, 0); if (rt == NULL) goto e_nobufs; @@ -1383,7 +1383,7 @@ static int dn_route_input_slow(struct sk_buff *skb) } make_route: - rt = dst_alloc(&dn_dst_ops); + rt = dst_alloc(&dn_dst_ops, 0); if (rt == NULL) goto e_nobufs; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 79a2871..9841543 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1818,12 +1818,10 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) { - struct rtable *rt = dst_alloc(&ipv4_dst_ops); + struct rtable *rt = dst_alloc(&ipv4_dst_ops, 1); if (rt) { rt->dst.obsolete = -1; - atomic_set(&rt->dst.__refcnt, 1); - rt->dst.flags = DST_HOST | (nopolicy ? DST_NOPOLICY : 0) | (noxfrm ? DST_NOXFRM : 0); @@ -2679,12 +2677,11 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi { struct rtable *ort = *rp; struct rtable *rt = (struct rtable *) - dst_alloc(&ipv4_dst_blackhole_ops); + dst_alloc(&ipv4_dst_blackhole_ops, 1); if (rt) { struct dst_entry *new = &rt->dst; - atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad8556e..7946b53 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -221,7 +221,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) { - return (struct rt6_info *)dst_alloc(ops); + return (struct rt6_info *)dst_alloc(ops, 0); } static void ip6_dst_destroy(struct dst_entry *dst) @@ -873,13 +873,12 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl { struct rt6_info *ort = (struct rt6_info *) *dstp; struct rt6_info *rt = (struct rt6_info *) - dst_alloc(&ip6_dst_blackhole_ops); + dst_alloc(&ip6_dst_blackhole_ops, 1); struct dst_entry *new = NULL; if (rt) { new = &rt->dst; - atomic_set(&new->__refcnt, 1); new->__use = 1; new->input = dst_discard; new->output = dst_discard; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8b3ef40..3f1257a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1340,7 +1340,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); + xdst = dst_alloc(dst_ops, 0) ?: ERR_PTR(-ENOBUFS); xfrm_policy_put_afinfo(afinfo); xdst->flo.ops = &xfrm_bundle_fc_ops; -- cgit v1.1 From 3b004569d86d02786ebae496e75dc0b625be3e9a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Feb 2011 14:56:22 -0800 Subject: ipv4: Avoid use of signed integers in fib_trie code. GCC emits all kinds of crazy zero extensions when we go from signed int, to unsigned short, etc. etc. This transformation has to be legal because: 1) In tkey_extract_bits() in mask_pfx(), the values are used to perform shifts, on which negative values are undefined by C. 2) In fib_table_lookup() we perform comparisons with unsigned values, constants, and additions. None of which should encounter negative values. Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1eae90b..edf3b09 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -217,12 +217,12 @@ static inline int tnode_child_length(const struct tnode *tn) return 1 << tn->bits; } -static inline t_key mask_pfx(t_key k, unsigned short l) +static inline t_key mask_pfx(t_key k, unsigned int l) { return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); } -static inline t_key tkey_extract_bits(t_key a, int offset, int bits) +static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits) { if (offset < KEYLENGTH) return ((t_key)(a << offset)) >> (KEYLENGTH - bits); @@ -1378,11 +1378,11 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, int ret; struct rt_trie_node *n; struct tnode *pn; - int pos, bits; + unsigned int pos, bits; t_key key = ntohl(flp->fl4_dst); - int chopped_off; + unsigned int chopped_off; t_key cindex = 0; - int current_prefix_length = KEYLENGTH; + unsigned int current_prefix_length = KEYLENGTH; struct tnode *cn; t_key pref_mismatch; -- cgit v1.1 From 982721f3911b2619482e05910644e5699fbeb065 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 16 Feb 2011 21:44:24 -0800 Subject: ipv4: Use const'ify fib_result deep in the route call chains. The only troublesome bit here is __mkroute_output which wants to override res->fi and res->type, compute those in local variables instead. Signed-off-by: David S. Miller --- net/ipv4/fib_rules.c | 2 +- net/ipv4/route.c | 32 +++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 9cefe72..3018efb 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,7 +47,7 @@ struct fib4_rule { }; #ifdef CONFIG_IP_ROUTE_CLASSID -u32 fib_rules_tclass(struct fib_result *res) +u32 fib_rules_tclass(const struct fib_result *res) { return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9841543..2facde0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1787,10 +1787,10 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) } } -static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) +static void rt_set_nexthop(struct rtable *rt, const struct fib_result *res, + struct fib_info *fi, u16 type, u32 itag) { struct dst_entry *dst = &rt->dst; - struct fib_info *fi = res->fi; if (fi) { if (FIB_RES_GW(*res) && @@ -1813,7 +1813,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) #endif set_class_tag(rt, itag); #endif - rt->rt_type = res->type; + rt->rt_type = type; } static struct rtable *rt_dst_alloc(bool nopolicy, bool noxfrm) @@ -1939,7 +1939,7 @@ static void ip_handle_martian_source(struct net_device *dev, /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, - struct fib_result *res, + const struct fib_result *res, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) @@ -2018,7 +2018,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.output = ip_output; rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); - rt_set_nexthop(rth, res, itag); + rt_set_nexthop(rth, res, res->fi, res->type, itag); rth->rt_flags = flags; @@ -2319,23 +2319,25 @@ skip_cache: EXPORT_SYMBOL(ip_route_input_common); /* called with rcu_read_lock() */ -static struct rtable *__mkroute_output(struct fib_result *res, +static struct rtable *__mkroute_output(const struct fib_result *res, const struct flowi *fl, const struct flowi *oldflp, struct net_device *dev_out, unsigned int flags) { + struct fib_info *fi = res->fi; u32 tos = RT_FL_TOS(oldflp); struct in_device *in_dev; + u16 type = res->type; struct rtable *rth; if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl->fl4_dst)) - res->type = RTN_BROADCAST; + type = RTN_BROADCAST; else if (ipv4_is_multicast(fl->fl4_dst)) - res->type = RTN_MULTICAST; + type = RTN_MULTICAST; else if (ipv4_is_zeronet(fl->fl4_dst)) return ERR_PTR(-EINVAL); @@ -2346,10 +2348,10 @@ static struct rtable *__mkroute_output(struct fib_result *res, if (!in_dev) return ERR_PTR(-EINVAL); - if (res->type == RTN_BROADCAST) { + if (type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; - res->fi = NULL; - } else if (res->type == RTN_MULTICAST) { + fi = NULL; + } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, oldflp->proto)) @@ -2358,8 +2360,8 @@ static struct rtable *__mkroute_output(struct fib_result *res, * default one, but do not gateway in this case. * Yes, it is hack. */ - if (res->fi && res->prefixlen < 4) - res->fi = NULL; + if (fi && res->prefixlen < 4) + fi = NULL; } rth = rt_dst_alloc(IN_DEV_CONF_GET(in_dev, NOPOLICY), @@ -2399,7 +2401,7 @@ static struct rtable *__mkroute_output(struct fib_result *res, RT_CACHE_STAT_INC(out_slow_mc); } #ifdef CONFIG_IP_MROUTE - if (res->type == RTN_MULTICAST) { + if (type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && !ipv4_is_local_multicast(oldflp->fl4_dst)) { rth->dst.input = ip_mr_input; @@ -2409,7 +2411,7 @@ static struct rtable *__mkroute_output(struct fib_result *res, #endif } - rt_set_nexthop(rth, res, 0); + rt_set_nexthop(rth, res, fi, type, 0); rth->rt_flags = flags; return rth; -- cgit v1.1 From fd23c3b31107e2fc483301ee923d8a1db14e53f4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Feb 2011 12:42:28 -0800 Subject: ipv4: Add hash table of interface addresses. This will be used to optimize __ip_dev_find() and friends. With help from Eric Dumazet. Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 748cb5b..2fe5076 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -51,6 +51,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #include #endif @@ -92,6 +93,38 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; +/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE + * value. So if you change this define, make appropriate changes to + * inet_addr_hash as well. + */ +#define IN4_ADDR_HSIZE 256 +static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE]; +static DEFINE_SPINLOCK(inet_addr_hash_lock); + +static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) +{ + u32 val = (__force u32) addr ^ hash_ptr(net, 8); + + return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) & + (IN4_ADDR_HSIZE - 1)); +} + +static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) +{ + unsigned int hash = inet_addr_hash(net, ifa->ifa_address); + + spin_lock(&inet_addr_hash_lock); + hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); + spin_unlock(&inet_addr_hash_lock); +} + +static void inet_hash_remove(struct in_ifaddr *ifa) +{ + spin_lock(&inet_addr_hash_lock); + hlist_del_init_rcu(&ifa->hash); + spin_unlock(&inet_addr_hash_lock); +} + static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); @@ -265,6 +298,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } if (!do_promote) { + inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); @@ -281,6 +315,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, /* 2. Unlink it */ *ifap = ifa1->ifa_next; + inet_hash_remove(ifa1); /* 3. Announce address deletion */ @@ -368,6 +403,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ifa->ifa_next = *ifap; *ifap = ifa; + inet_hash_insert(dev_net(in_dev->dev), ifa); + /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ @@ -521,6 +558,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) if (tb[IFA_ADDRESS] == NULL) tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + INIT_HLIST_NODE(&ifa->hash); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); ifa->ifa_flags = ifm->ifa_flags; @@ -728,6 +766,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { ret = -ENOBUFS; ifa = inet_alloc_ifa(); + INIT_HLIST_NODE(&ifa->hash); if (!ifa) break; if (colon) @@ -1069,6 +1108,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, struct in_ifaddr *ifa = inet_alloc_ifa(); if (ifa) { + INIT_HLIST_NODE(&ifa->hash); ifa->ifa_local = ifa->ifa_address = htonl(INADDR_LOOPBACK); ifa->ifa_prefixlen = 8; @@ -1710,6 +1750,11 @@ static struct rtnl_af_ops inet_af_ops = { void __init devinet_init(void) { + int i; + + for (i = 0; i < IN4_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet_addr_lst[i]); + register_pernet_subsys(&devinet_ops); register_gifconf(PF_INET, inet_gifconf); -- cgit v1.1 From 9435eb1cf0b76b323019cebf8d16762a50a12a19 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 18 Feb 2011 12:43:09 -0800 Subject: ipv4: Implement __ip_dev_find using new interface address hash. Much quicker than going through the FIB tables. Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 33 +++++++++++++++++++++++++++++++++ net/ipv4/fib_frontend.c | 40 ---------------------------------------- 2 files changed, 33 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2fe5076..ee144a4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -125,6 +125,39 @@ static void inet_hash_remove(struct in_ifaddr *ifa) spin_unlock(&inet_addr_hash_lock); } +/** + * __ip_dev_find - find the first device with a given source address. + * @net: the net namespace + * @addr: the source address + * @devref: if true, take a reference on the found device + * + * If a caller uses devref=false, it should be protected by RCU, or RTNL + */ +struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) +{ + unsigned int hash = inet_addr_hash(net, addr); + struct net_device *result = NULL; + struct in_ifaddr *ifa; + struct hlist_node *node; + + rcu_read_lock(); + hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) { + struct net_device *dev = ifa->ifa_dev->dev; + + if (!net_eq(dev_net(dev), net)) + continue; + if (ifa->ifa_address == addr) { + result = dev; + break; + } + } + if (result && devref) + dev_hold(result); + rcu_read_unlock(); + return result; +} +EXPORT_SYMBOL(__ip_dev_find); + static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2a49c06..ad0778a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -132,46 +132,6 @@ static void fib_flush(struct net *net) rt_cache_flush(net, -1); } -/** - * __ip_dev_find - find the first device with a given source address. - * @net: the net namespace - * @addr: the source address - * @devref: if true, take a reference on the found device - * - * If a caller uses devref=false, it should be protected by RCU, or RTNL - */ -struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) -{ - struct flowi fl = { - .fl4_dst = addr, - }; - struct fib_result res = { 0 }; - struct net_device *dev = NULL; - struct fib_table *local_table; - -#ifdef CONFIG_IP_MULTIPLE_TABLES - res.r = NULL; -#endif - - rcu_read_lock(); - local_table = fib_get_table(net, RT_TABLE_LOCAL); - if (!local_table || - fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { - rcu_read_unlock(); - return NULL; - } - if (res.type != RTN_LOCAL) - goto out; - dev = FIB_RES_DEV(res); - - if (dev && devref) - dev_hold(dev); -out: - rcu_read_unlock(); - return dev; -} -EXPORT_SYMBOL(__ip_dev_find); - /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. -- cgit v1.1 From 83bdf2a17279bd6ee3d0f5c0f086ebe06644109d Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Tue, 15 Feb 2011 13:11:22 -0800 Subject: mac80211: Add power to debugfs. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- net/mac80211/debugfs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 1f02e59..51f0d78 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -60,6 +60,10 @@ static const struct file_operations name## _ops = { \ debugfs_create_file(#name, mode, phyd, local, &name## _ops); +DEBUGFS_READONLY_FILE(user_power, "%d", + local->user_power_level); +DEBUGFS_READONLY_FILE(power, "%d", + local->hw.conf.power_level); DEBUGFS_READONLY_FILE(frequency, "%d", local->hw.conf.channel->center_freq); DEBUGFS_READONLY_FILE(total_ps_buffered, "%d", @@ -391,6 +395,8 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(uapsd_queues); DEBUGFS_ADD(uapsd_max_sp_len); DEBUGFS_ADD(channel_type); + DEBUGFS_ADD(user_power); + DEBUGFS_ADD(power); statsd = debugfs_create_dir("statistics", phyd); -- cgit v1.1 From 8ba0537c620ad9f37b0e810ce0a9ff367a021f5e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 Feb 2011 08:46:58 +0100 Subject: mac80211: fix 2.4 GHz 40 MHz disabling The module parameter ieee80211_disable_40mhz_24ghz was meant to allow disabling 40 MHz operation in the 2.4 GHz band by default. However, it is buggy as implemented because while it advertises to the AP that the device doesn't support 40 MHz, it will itself still use 40 MHz configurations. To fix this, clear the 40 MHz bits from the sband completely instead of overriding where used. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/main.c | 14 +++++++++++++- net/mac80211/util.c | 6 ------ net/mac80211/work.c | 6 ------ 4 files changed, 13 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 44eea1a..bb63878 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1066,8 +1066,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, void ieee80211_configure_filter(struct ieee80211_local *local); u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); -extern bool ieee80211_disable_40mhz_24ghz; - /* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e7eb2cf..2543e48 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -34,7 +34,7 @@ #include "debugfs.h" -bool ieee80211_disable_40mhz_24ghz; +static bool ieee80211_disable_40mhz_24ghz; module_param(ieee80211_disable_40mhz_24ghz, bool, 0644); MODULE_PARM_DESC(ieee80211_disable_40mhz_24ghz, "Disable 40MHz support in the 2.4GHz band"); @@ -723,6 +723,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } channels += sband->n_channels; + /* + * Since ieee80211_disable_40mhz_24ghz is global, we can + * modify the sband's ht data even if the driver uses a + * global structure for that. + */ + if (ieee80211_disable_40mhz_24ghz && + band == IEEE80211_BAND_2GHZ && + sband->ht_cap.ht_supported) { + sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; + sband->ht_cap.cap &= ~IEEE80211_HT_CAP_SGI_40; + } + if (max_bitrates < sband->n_bitrates) max_bitrates = sband->n_bitrates; supp_ht = supp_ht || sband->ht_cap.ht_supported; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index cf68700..26fd5d2 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -986,12 +986,6 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, u16 cap = sband->ht_cap.cap; __le16 tmp; - if (ieee80211_disable_40mhz_24ghz && - sband->band == IEEE80211_BAND_2GHZ) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - cap &= ~IEEE80211_HT_CAP_SGI_40; - } - *pos++ = WLAN_EID_HT_CAPABILITY; *pos++ = sizeof(struct ieee80211_ht_cap); memset(pos, 0, sizeof(struct ieee80211_ht_cap)); diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 64f2b28..204f0a4 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -126,12 +126,6 @@ static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie, /* determine capability flags */ - if (ieee80211_disable_40mhz_24ghz && - sband->band == IEEE80211_BAND_2GHZ) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; - cap &= ~IEEE80211_HT_CAP_SGI_40; - } - switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: if (flags & IEEE80211_CHAN_NO_HT40PLUS) { -- cgit v1.1 From 540005c7fc787c211967148f7229f43db1eead38 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 17 Feb 2011 13:36:19 +0000 Subject: small adjustment to net/mac80211/Kconfig "def_bool n" without prompt is pointless, this should be just "bool". Signed-off-by: Jan Beulich Signed-off-by: John W. Linville --- net/mac80211/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 9109262..4c57a9c 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -17,7 +17,7 @@ comment "CFG80211 needs to be enabled for MAC80211" if MAC80211 != n config MAC80211_HAS_RC - def_bool n + bool config MAC80211_RC_PID bool "PID controller based rate control algorithm" if EMBEDDED -- cgit v1.1 From db28569adc692d9fb8a2d2d8e7ebab7fd5481f10 Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Fri, 18 Feb 2011 17:18:03 +0530 Subject: mac80211: Clear PS related flag on disabling power save. Clear IEEE80211_STA_NULLFUNC_ACKED flag on disabling power save. Without this fix, there is a chance of setting CONF_PS before sending nullfunc frame. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/tx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 38e5939..d0f91d7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -236,6 +236,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_QUEUE_STOP_REASON_PS); + ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; ieee80211_queue_work(&local->hw, &local->dynamic_ps_disable_work); } -- cgit v1.1 From 089c34827e52346f0303d1e6a7b744c1f4da3095 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sat, 19 Feb 2011 21:55:45 +0000 Subject: tcp: Remove debug macro of TCP_CHECK_TIMER Now, TCP_CHECK_TIMER is not used for debuging, it does nothing. And, it has been there for several years, maybe 6 years. Remove it to keep code clearer. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 --------- net/ipv4/tcp_ipv4.c | 5 ----- net/ipv4/tcp_timer.c | 3 --- net/ipv6/tcp_ipv6.c | 4 ---- 4 files changed, 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f9867d2..a17a5a7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -873,9 +873,7 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); - TCP_CHECK_TIMER(sk); res = do_tcp_sendpages(sk, &page, offset, size, flags); - TCP_CHECK_TIMER(sk); release_sock(sk); return res; } @@ -916,7 +914,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, long timeo; lock_sock(sk); - TCP_CHECK_TIMER(sk); flags = msg->msg_flags; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); @@ -1104,7 +1101,6 @@ wait_for_memory: out: if (copied) tcp_push(sk, flags, mss_now, tp->nonagle); - TCP_CHECK_TIMER(sk); release_sock(sk); return copied; @@ -1123,7 +1119,6 @@ do_error: goto out; out_err: err = sk_stream_error(sk, flags, err); - TCP_CHECK_TIMER(sk); release_sock(sk); return err; } @@ -1415,8 +1410,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); - TCP_CHECK_TIMER(sk); - err = -ENOTCONN; if (sk->sk_state == TCP_LISTEN) goto out; @@ -1767,12 +1760,10 @@ skip_copy: /* Clean up data we have read: This will do ACK frames. */ tcp_cleanup_rbuf(sk, copied); - TCP_CHECK_TIMER(sk); release_sock(sk); return copied; out: - TCP_CHECK_TIMER(sk); release_sock(sk); return err; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e2b9be2..ef5a90b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1556,12 +1556,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb->rxhash); - TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } - TCP_CHECK_TIMER(sk); return 0; } @@ -1583,13 +1581,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } else sock_rps_save_rxhash(sk, skb->rxhash); - - TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } - TCP_CHECK_TIMER(sk); return 0; reset: diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74a6aa0..ecd44b0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -259,7 +259,6 @@ static void tcp_delack_timer(unsigned long data) tcp_send_ack(sk); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS); } - TCP_CHECK_TIMER(sk); out: if (tcp_memory_pressure) @@ -481,7 +480,6 @@ static void tcp_write_timer(unsigned long data) tcp_probe_timer(sk); break; } - TCP_CHECK_TIMER(sk); out: sk_mem_reclaim(sk); @@ -589,7 +587,6 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp) - elapsed; } - TCP_CHECK_TIMER(sk); sk_mem_reclaim(sk); resched: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d6954e3..1d0ab55 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1636,10 +1636,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; - TCP_CHECK_TIMER(sk); if (opt_skb) goto ipv6_pktoptions; return 0; @@ -1667,10 +1665,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; - TCP_CHECK_TIMER(sk); if (opt_skb) goto ipv6_pktoptions; return 0; -- cgit v1.1 From 59ed5aba9ca1c799e272b352d5d2d7fe12bd32e8 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sat, 19 Feb 2011 21:57:26 +0000 Subject: sctp: fix compile warnings in sctp_tsnmap_num_gabs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/sctp/tsnmap.c: In function ‘sctp_tsnmap_num_gabs’: net/sctp/tsnmap.c:347: warning: ‘start’ may be used uninitialized in this function net/sctp/tsnmap.c:347: warning: ‘end’ may be used uninitialized in this function Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- net/sctp/tsnmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 747d541..f1e40ceb 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -344,7 +344,7 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, /* Refresh the gap ack information. */ if (sctp_tsnmap_has_gap(map)) { - __u16 start, end; + __u16 start = 0, end = 0; sctp_tsnmap_iter_init(map, &iter); while (sctp_tsnmap_next_gap_ack(map, &iter, &start, -- cgit v1.1 From 366a033698266c304abd6365ea3bcaec36860328 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 19 Feb 2011 12:05:55 -0300 Subject: Bluetooth: Make pending_add return a pointer to the added entry This makes it more convenient to do manipulations on the entry (needed by later commits). Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 62 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f5ef7a3..52e5f88 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -219,14 +219,14 @@ static void mgmt_pending_free(struct pending_cmd *cmd) kfree(cmd); } -static int mgmt_pending_add(struct sock *sk, u16 opcode, int index, - void *data, u16 len) +static struct pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, + u16 index, void *data, u16 len) { struct pending_cmd *cmd; cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC); if (!cmd) - return -ENOMEM; + return NULL; cmd->opcode = opcode; cmd->index = index; @@ -234,7 +234,7 @@ static int mgmt_pending_add(struct sock *sk, u16 opcode, int index, cmd->cmd = kmalloc(len, GFP_ATOMIC); if (!cmd->cmd) { kfree(cmd); - return -ENOMEM; + return NULL; } memcpy(cmd->cmd, data, len); @@ -244,7 +244,7 @@ static int mgmt_pending_add(struct sock *sk, u16 opcode, int index, list_add(&cmd->list, &cmd_list); - return 0; + return cmd; } static void mgmt_pending_foreach(u16 opcode, int index, @@ -305,8 +305,9 @@ static int set_powered(struct sock *sk, unsigned char *data, u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; + struct pending_cmd *cmd; u16 dev_id; - int ret, up; + int err, up; cp = (void *) data; dev_id = get_unaligned_le16(&cp->index); @@ -321,36 +322,39 @@ static int set_powered(struct sock *sk, unsigned char *data, u16 len) up = test_bit(HCI_UP, &hdev->flags); if ((cp->val && up) || (!cp->val && !up)) { - ret = cmd_status(sk, MGMT_OP_SET_POWERED, EALREADY); + err = cmd_status(sk, MGMT_OP_SET_POWERED, EALREADY); goto failed; } if (mgmt_pending_find(MGMT_OP_SET_POWERED, dev_id)) { - ret = cmd_status(sk, MGMT_OP_SET_POWERED, EBUSY); + err = cmd_status(sk, MGMT_OP_SET_POWERED, EBUSY); goto failed; } - ret = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, dev_id, data, len); - if (ret < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, dev_id, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } if (cp->val) queue_work(hdev->workqueue, &hdev->power_on); else queue_work(hdev->workqueue, &hdev->power_off); - ret = 0; + err = 0; failed: hci_dev_unlock_bh(hdev); hci_dev_put(hdev); - return ret; + return err; } static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; + struct pending_cmd *cmd; u16 dev_id; u8 scan; int err; @@ -383,9 +387,11 @@ static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, dev_id, data, len); - if (err < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, dev_id, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } scan = SCAN_PAGE; @@ -407,6 +413,7 @@ static int set_connectable(struct sock *sk, unsigned char *data, u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; + struct pending_cmd *cmd; u16 dev_id; u8 scan; int err; @@ -438,9 +445,11 @@ static int set_connectable(struct sock *sk, unsigned char *data, u16 len) goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, dev_id, data, len); - if (err < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, dev_id, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } if (cp->val) scan = SCAN_PAGE; @@ -828,6 +837,7 @@ static int disconnect(struct sock *sk, unsigned char *data, u16 len) struct hci_dev *hdev; struct mgmt_cp_disconnect *cp; struct hci_cp_disconnect dc; + struct pending_cmd *cmd; struct hci_conn *conn; u16 dev_id; int err; @@ -859,9 +869,11 @@ static int disconnect(struct sock *sk, unsigned char *data, u16 len) goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, dev_id, data, len); - if (err < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, dev_id, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } put_unaligned_le16(conn->handle, &dc.handle); dc.reason = 0x13; /* Remote User Terminated Connection */ @@ -938,6 +950,7 @@ static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len) struct hci_dev *hdev; struct mgmt_cp_pin_code_reply *cp; struct hci_cp_pin_code_reply reply; + struct pending_cmd *cmd; u16 dev_id; int err; @@ -957,9 +970,11 @@ static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len) goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, dev_id, data, len); - if (err < 0) + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, dev_id, data, len); + if (!cmd) { + err = -ENOMEM; goto failed; + } bacpy(&reply.bdaddr, &cp->bdaddr); reply.pin_len = cp->pin_len; @@ -980,6 +995,7 @@ static int pin_code_neg_reply(struct sock *sk, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_pin_code_neg_reply *cp; + struct pending_cmd *cmd; u16 dev_id; int err; @@ -999,10 +1015,12 @@ static int pin_code_neg_reply(struct sock *sk, unsigned char *data, u16 len) goto failed; } - err = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, dev_id, + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, dev_id, data, len); - if (err < 0) + if (!cmd) { + err = -ENOMEM; goto failed; + } err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(bdaddr_t), &cp->bdaddr); -- cgit v1.1 From e9a416b5ce0c0f93819f55d34cf6882196e9c3b2 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 19 Feb 2011 12:05:56 -0300 Subject: Bluetooth: Add mgmt_pair_device command This patch adds a new mgmt_pair_device which can be used to initiate a dedicated bonding procedure. Some extra callbacks are added to the hci_conn struct so that the pairing code can get notified of the completion of the procedure. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 52e5f88..d7fc54d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,6 +38,7 @@ struct pending_cmd { int index; void *cmd; struct sock *sk; + void *user_data; }; LIST_HEAD(cmd_list); @@ -1063,6 +1064,135 @@ static int set_io_capability(struct sock *sk, unsigned char *data, u16 len) &dev_id, sizeof(dev_id)); } +static inline struct pending_cmd *find_pairing(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + struct list_head *p; + + list_for_each(p, &cmd_list) { + struct pending_cmd *cmd; + + cmd = list_entry(p, struct pending_cmd, list); + + if (cmd->opcode != MGMT_OP_PAIR_DEVICE) + continue; + + if (cmd->index != hdev->id) + continue; + + if (cmd->user_data != conn) + continue; + + return cmd; + } + + return NULL; +} + +static void pairing_complete(struct pending_cmd *cmd, u8 status) +{ + struct mgmt_rp_pair_device rp; + struct hci_conn *conn = cmd->user_data; + + rp.index = cmd->index; + bacpy(&rp.bdaddr, &conn->dst); + rp.status = status; + + cmd_complete(cmd->sk, MGMT_OP_PAIR_DEVICE, &rp, sizeof(rp)); + + /* So we don't get further callbacks for this connection */ + conn->connect_cfm_cb = NULL; + conn->security_cfm_cb = NULL; + conn->disconn_cfm_cb = NULL; + + hci_conn_put(conn); + + list_del(&cmd->list); + mgmt_pending_free(cmd); +} + +static void pairing_complete_cb(struct hci_conn *conn, u8 status) +{ + struct pending_cmd *cmd; + + BT_DBG("status %u", status); + + cmd = find_pairing(conn); + if (!cmd) { + BT_DBG("Unable to find a pending command"); + return; + } + + pairing_complete(cmd, status); +} + +static int pair_device(struct sock *sk, unsigned char *data, u16 len) +{ + struct hci_dev *hdev; + struct mgmt_cp_pair_device *cp; + struct pending_cmd *cmd; + u8 sec_level, auth_type; + struct hci_conn *conn; + u16 dev_id; + int err; + + BT_DBG(""); + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, MGMT_OP_PAIR_DEVICE, ENODEV); + + hci_dev_lock_bh(hdev); + + if (cp->io_cap == 0x03) { + sec_level = BT_SECURITY_MEDIUM; + auth_type = HCI_AT_DEDICATED_BONDING; + } else { + sec_level = BT_SECURITY_HIGH; + auth_type = HCI_AT_DEDICATED_BONDING_MITM; + } + + conn = hci_connect(hdev, ACL_LINK, &cp->bdaddr, sec_level, auth_type); + if (!conn) { + err = -ENOMEM; + goto unlock; + } + + if (conn->connect_cfm_cb) { + hci_conn_put(conn); + err = cmd_status(sk, MGMT_OP_PAIR_DEVICE, EBUSY); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, dev_id, data, len); + if (!cmd) { + err = -ENOMEM; + hci_conn_put(conn); + goto unlock; + } + + conn->connect_cfm_cb = pairing_complete_cb; + conn->security_cfm_cb = pairing_complete_cb; + conn->disconn_cfm_cb = pairing_complete_cb; + conn->io_capability = cp->io_cap; + cmd->user_data = conn; + + if (conn->state == BT_CONNECTED && + hci_conn_security(conn, sec_level, auth_type)) + pairing_complete(cmd, 0); + + err = 0; + +unlock: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -1148,6 +1278,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_SET_IO_CAPABILITY: err = set_io_capability(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_PAIR_DEVICE: + err = pair_device(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); -- cgit v1.1 From a5c296832b4fde7d32c01cff9cdd27d9c7c1c4f5 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 19 Feb 2011 12:05:57 -0300 Subject: Bluetooth: Add management support for user confirmation request This patch adds support for the user confirmation (numeric comparison) Secure Simple Pairing authentication method. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 50 ++++++++++++++++++++++ net/bluetooth/mgmt.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 98b5764..604c7b5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -796,6 +796,29 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev, hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status); } +static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_reply_complete(hdev->id, &rp->bdaddr, + rp->status); +} + +static void hci_cc_user_confirm_neg_reply(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_rp_user_confirm_reply *rp = (void *) skb->data; + + BT_DBG("%s status 0x%x", hdev->name, rp->status); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_neg_reply_complete(hdev->id, &rp->bdaddr, + rp->status); +} + static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { BT_DBG("%s status 0x%x", hdev->name, status); @@ -1728,6 +1751,14 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_le_read_buffer_size(hdev, skb); break; + case HCI_OP_USER_CONFIRM_REPLY: + hci_cc_user_confirm_reply(hdev, skb); + break; + + case HCI_OP_USER_CONFIRM_NEG_REPLY: + hci_cc_user_confirm_neg_reply(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; @@ -2362,6 +2393,21 @@ unlock: hci_dev_unlock(hdev); } +static inline void hci_user_confirm_request_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_user_confirm_req *ev = (void *) skb->data; + + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + if (test_bit(HCI_MGMT, &hdev->flags)) + mgmt_user_confirm_request(hdev->id, &ev->bdaddr, ev->passkey); + + hci_dev_unlock(hdev); +} + static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_simple_pair_complete *ev = (void *) skb->data; @@ -2580,6 +2626,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_io_capa_reply_evt(hdev, skb); break; + case HCI_EV_USER_CONFIRM_REQUEST: + hci_user_confirm_request_evt(hdev, skb); + break; + case HCI_EV_SIMPLE_PAIR_COMPLETE: hci_simple_pair_complete_evt(hdev, skb); break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d7fc54d..fdcc974 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1193,6 +1193,55 @@ unlock: return err; } +static int user_confirm_reply(struct sock *sk, unsigned char *data, u16 len, + int success) +{ + struct mgmt_cp_user_confirm_reply *cp = (void *) data; + u16 dev_id, mgmt_op, hci_op; + struct pending_cmd *cmd; + struct hci_dev *hdev; + int err; + + BT_DBG(""); + + dev_id = get_unaligned_le16(&cp->index); + + if (success) { + mgmt_op = MGMT_OP_USER_CONFIRM_REPLY; + hci_op = HCI_OP_USER_CONFIRM_REPLY; + } else { + mgmt_op = MGMT_OP_USER_CONFIRM_NEG_REPLY; + hci_op = HCI_OP_USER_CONFIRM_NEG_REPLY; + } + + hdev = hci_dev_get(dev_id); + if (!hdev) + return cmd_status(sk, mgmt_op, ENODEV); + + if (!test_bit(HCI_UP, &hdev->flags)) { + err = cmd_status(sk, mgmt_op, ENETDOWN); + goto failed; + } + + cmd = mgmt_pending_add(sk, mgmt_op, dev_id, data, len); + if (!cmd) { + err = -ENOMEM; + goto failed; + } + + err = hci_send_cmd(hdev, hci_op, sizeof(cp->bdaddr), &cp->bdaddr); + if (err < 0) { + list_del(&cmd->list); + mgmt_pending_free(cmd); + } + +failed: + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); + + return err; +} + int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; @@ -1281,6 +1330,12 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_PAIR_DEVICE: err = pair_device(sk, buf + sizeof(*hdr), len); break; + case MGMT_OP_USER_CONFIRM_REPLY: + err = user_confirm_reply(sk, buf + sizeof(*hdr), len, 1); + break; + case MGMT_OP_USER_CONFIRM_NEG_REPLY: + err = user_confirm_reply(sk, buf + sizeof(*hdr), len, 0); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); @@ -1541,3 +1596,51 @@ int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) return err; } + +int mgmt_user_confirm_request(u16 index, bdaddr_t *bdaddr, __le32 value) +{ + struct mgmt_ev_user_confirm_request ev; + + BT_DBG("hci%u", index); + + put_unaligned_le16(index, &ev.index); + bacpy(&ev.bdaddr, bdaddr); + put_unaligned_le32(value, &ev.value); + + return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, &ev, sizeof(ev), NULL); +} + +static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status, + u8 opcode) +{ + struct pending_cmd *cmd; + struct mgmt_rp_user_confirm_reply rp; + int err; + + cmd = mgmt_pending_find(opcode, index); + if (!cmd) + return -ENOENT; + + put_unaligned_le16(index, &rp.index); + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; + err = cmd_complete(cmd->sk, opcode, &rp, sizeof(rp)); + + list_del(&cmd->list); + mgmt_pending_free(cmd); + + return err; +} + +int mgmt_user_confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) +{ + return confirm_reply_complete(index, bdaddr, status, + MGMT_OP_USER_CONFIRM_REPLY); +} + +int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, + u8 status) +{ + return confirm_reply_complete(index, bdaddr, status, + MGMT_OP_USER_CONFIRM_NEG_REPLY); +} -- cgit v1.1 From 59a24b5d0d4befc2498f51c57905cb02963ff275 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 19 Feb 2011 12:05:58 -0300 Subject: Bluetooth: Fix mgmt_pin_code_reply command status opcode The opcode for the ENODEV case was wrong (probably copy-paste mistake). Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fdcc974..d1d9b8c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -962,7 +962,7 @@ static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len) hdev = hci_dev_get(dev_id); if (!hdev) - return cmd_status(sk, MGMT_OP_DISCONNECT, ENODEV); + return cmd_status(sk, MGMT_OP_PIN_CODE_REPLY, ENODEV); hci_dev_lock_bh(hdev); -- cgit v1.1 From ac56fb13c0508181b4227b8ada6d47aaaf72794c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 19 Feb 2011 12:05:59 -0300 Subject: Bluetooth: Fix mgmt_pin_code_reply return parameters The command complete event for mgmt_pin_code_reply & mgmt_pin_code_neg_reply should have the adapter index, Bluetooth address as well as the status. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d1d9b8c..0d3d613 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1558,17 +1558,18 @@ int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr) int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) { struct pending_cmd *cmd; + struct mgmt_rp_pin_code_reply rp; int err; cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_REPLY, index); if (!cmd) return -ENOENT; - if (status != 0) - err = cmd_status(cmd->sk, MGMT_OP_PIN_CODE_REPLY, status); - else - err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_REPLY, - bdaddr, sizeof(*bdaddr)); + put_unaligned_le16(index, &rp.index); + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; + + err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_REPLY, &rp, sizeof(rp)); list_del(&cmd->list); mgmt_pending_free(cmd); @@ -1579,17 +1580,19 @@ int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) { struct pending_cmd *cmd; + struct mgmt_rp_pin_code_reply rp; int err; cmd = mgmt_pending_find(MGMT_OP_PIN_CODE_NEG_REPLY, index); if (!cmd) return -ENOENT; - if (status != 0) - err = cmd_status(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, status); - else - err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, - bdaddr, sizeof(*bdaddr)); + put_unaligned_le16(index, &rp.index); + bacpy(&rp.bdaddr, bdaddr); + rp.status = status; + + err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, + &rp, sizeof(rp)); list_del(&cmd->list); mgmt_pending_free(cmd); -- cgit v1.1 From 2a61169209c72317d4933f8d22f749a6a61a3d36 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 19 Feb 2011 12:06:00 -0300 Subject: Bluetooth: Add mgmt_auth_failed event To properly track bonding completion an event to indicate authentication failure is needed. This event will be sent whenever an authentication complete HCI event with a non-zero status comes. It will also be sent when we're acting in acceptor role for SSP authentication in which case the controller will send a Simple Pairing Complete event. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_event.c | 19 ++++++++++++++++--- net/bluetooth/mgmt.c | 11 +++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 604c7b5..3fbfa50 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1424,8 +1424,10 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (!ev->status) { conn->link_mode |= HCI_LM_AUTH; conn->sec_level = conn->pending_sec_level; - } else + } else { + mgmt_auth_failed(hdev->id, &conn->dst, ev->status); conn->sec_level = BT_SECURITY_LOW; + } clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); @@ -2418,9 +2420,20 @@ static inline void hci_simple_pair_complete_evt(struct hci_dev *hdev, struct sk_ hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (conn) - hci_conn_put(conn); + if (!conn) + goto unlock; + + /* To avoid duplicate auth_failed events to user space we check + * the HCI_CONN_AUTH_PEND flag which will be set if we + * initiated the authentication. A traditional auth_complete + * event gets always produced as initiator and is also mapped to + * the mgmt_auth_failed event */ + if (!test_bit(HCI_CONN_AUTH_PEND, &conn->pend) && ev->status != 0) + mgmt_auth_failed(hdev->id, &conn->dst, ev->status); + hci_conn_put(conn); + +unlock: hci_dev_unlock(hdev); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0d3d613..46e2c39 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1647,3 +1647,14 @@ int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, return confirm_reply_complete(index, bdaddr, status, MGMT_OP_USER_CONFIRM_NEG_REPLY); } + +int mgmt_auth_failed(u16 index, bdaddr_t *bdaddr, u8 status) +{ + struct mgmt_ev_auth_failed ev; + + put_unaligned_le16(index, &ev.index); + bacpy(&ev.bdaddr, bdaddr); + ev.status = status; + + return mgmt_event(MGMT_EV_AUTH_FAILED, &ev, sizeof(ev), NULL); +} -- cgit v1.1 From a958355699dd90ba69951bdf55dda00e3e97222c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 19 Feb 2011 12:06:01 -0300 Subject: Bluetooth: Fix inititial value for remote authentication requirements The remote authentication requirements for conections need to be initialized to 0xff (unknown) since it is possible that we receive a IO Capability Request before we have received information about the remote requirements. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a050a69..6d8b988 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -286,6 +286,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->state = BT_OPEN; conn->auth_type = HCI_AT_GENERAL_BONDING; conn->io_capability = hdev->io_capability; + conn->remote_auth = 0xff; conn->power_save = 1; conn->disc_timeout = HCI_DISCONN_TIMEOUT; -- cgit v1.1 From a664b5bc77fbc80c163de5606114659d3cbeb043 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Sat, 19 Feb 2011 12:06:02 -0300 Subject: Bluetooth: Fix unnecessary list traversal in mgmt_pending_remove All of the places that need to call mgmt_pending_remove already have a pointer to the pending command, so searching for the command in the list doesn't make sense. The added benefit is that many places that previously had to call list_del + mgmt_pending_free can just call mgmt_pending_remove now. Signed-off-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 46e2c39..982becd 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -290,14 +290,8 @@ static struct pending_cmd *mgmt_pending_find(u16 opcode, int index) return NULL; } -static void mgmt_pending_remove(u16 opcode, int index) +static void mgmt_pending_remove(struct pending_cmd *cmd) { - struct pending_cmd *cmd; - - cmd = mgmt_pending_find(opcode, index); - if (cmd == NULL) - return; - list_del(&cmd->list); mgmt_pending_free(cmd); } @@ -401,7 +395,7 @@ static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); if (err < 0) - mgmt_pending_remove(MGMT_OP_SET_DISCOVERABLE, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -459,7 +453,7 @@ static int set_connectable(struct sock *sk, unsigned char *data, u16 len) err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); if (err < 0) - mgmt_pending_remove(MGMT_OP_SET_CONNECTABLE, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -881,7 +875,7 @@ static int disconnect(struct sock *sk, unsigned char *data, u16 len) err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc); if (err < 0) - mgmt_pending_remove(MGMT_OP_DISCONNECT, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -983,7 +977,7 @@ static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len) err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_REPLY, sizeof(reply), &reply); if (err < 0) - mgmt_pending_remove(MGMT_OP_PIN_CODE_REPLY, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -1026,7 +1020,7 @@ static int pin_code_neg_reply(struct sock *sk, unsigned char *data, u16 len) err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(bdaddr_t), &cp->bdaddr); if (err < 0) - mgmt_pending_remove(MGMT_OP_PIN_CODE_NEG_REPLY, dev_id); + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -1107,8 +1101,7 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status) hci_conn_put(conn); - list_del(&cmd->list); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); } static void pairing_complete_cb(struct hci_conn *conn, u8 status) @@ -1230,10 +1223,8 @@ static int user_confirm_reply(struct sock *sk, unsigned char *data, u16 len, } err = hci_send_cmd(hdev, hci_op, sizeof(cp->bdaddr), &cp->bdaddr); - if (err < 0) { - list_del(&cmd->list); - mgmt_pending_free(cmd); - } + if (err < 0) + mgmt_pending_remove(cmd); failed: hci_dev_unlock_bh(hdev); @@ -1494,8 +1485,7 @@ static void disconnect_rsp(struct pending_cmd *cmd, void *data) *sk = cmd->sk; sock_hold(*sk); - list_del(&cmd->list); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); } int mgmt_disconnected(u16 index, bdaddr_t *bdaddr) @@ -1528,8 +1518,7 @@ int mgmt_disconnect_failed(u16 index) err = cmd_status(cmd->sk, MGMT_OP_DISCONNECT, EIO); - list_del(&cmd->list); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); return err; } @@ -1571,8 +1560,7 @@ int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_REPLY, &rp, sizeof(rp)); - list_del(&cmd->list); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); return err; } @@ -1594,8 +1582,7 @@ int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, &rp, sizeof(rp)); - list_del(&cmd->list); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); return err; } @@ -1629,8 +1616,7 @@ static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status, rp.status = status; err = cmd_complete(cmd->sk, opcode, &rp, sizeof(rp)); - list_del(&cmd->list); - mgmt_pending_free(cmd); + mgmt_pending_remove(cmd); return err; } -- cgit v1.1 From 15c4794fe247d85ce38eb5f5e2a5855d996f56cd Mon Sep 17 00:00:00 2001 From: Anderson Briglia Date: Mon, 21 Feb 2011 15:09:23 -0300 Subject: Bluetooth: Fix LE conn creation This patch prevents a crash when remote host tries to create a LE link which already exists. i.e.: call l2test twice passing the same parameters. Signed-off-by: Anderson Briglia Signed-off-by: Ville Tervo Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 6d8b988..4504cb6 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -430,8 +430,9 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (type == LE_LINK) { le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); - if (!le) - le = hci_conn_add(hdev, LE_LINK, dst); + if (le) + return NULL; + le = hci_conn_add(hdev, LE_LINK, dst); if (!le) return NULL; if (le->state == BT_OPEN) -- cgit v1.1 From 731109e78415b4cc6c2f8de6c11b37f0e40741f8 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 19 Feb 2011 18:05:08 +0800 Subject: ipvs: use hlist instead of list Signed-off-by: Changli Gao Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 52 +++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 83233fe..9c2a517 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -59,7 +59,7 @@ static int ip_vs_conn_tab_mask __read_mostly; /* * Connection hash table: for input and output packets lookups of IPVS */ -static struct list_head *ip_vs_conn_tab __read_mostly; +static struct hlist_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; @@ -201,7 +201,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { - list_add(&cp->c_list, &ip_vs_conn_tab[hash]); + hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]); cp->flags |= IP_VS_CONN_F_HASHED; atomic_inc(&cp->refcnt); ret = 1; @@ -234,7 +234,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { - list_del(&cp->c_list); + hlist_del(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; atomic_dec(&cp->refcnt); ret = 1; @@ -259,12 +259,13 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { unsigned hash; struct ip_vs_conn *cp; + struct hlist_node *n; hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); - list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && @@ -345,12 +346,13 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { unsigned hash; struct ip_vs_conn *cp; + struct hlist_node *n; hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); - list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { if (!ip_vs_conn_net_eq(cp, p->net)) continue; if (p->pe_data && p->pe->ct_match) { @@ -394,6 +396,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; + struct hlist_node *n; /* * Check for "full" addressed entries @@ -402,7 +405,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) ct_read_lock(hash); - list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && @@ -818,7 +821,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, return NULL; } - INIT_LIST_HEAD(&cp->c_list); + INIT_HLIST_NODE(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); ip_vs_conn_net_set(cp, p->net); cp->af = p->af; @@ -894,8 +897,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, */ #ifdef CONFIG_PROC_FS struct ip_vs_iter_state { - struct seq_net_private p; - struct list_head *l; + struct seq_net_private p; + struct hlist_head *l; }; static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) @@ -903,13 +906,14 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) int idx; struct ip_vs_conn *cp; struct ip_vs_iter_state *iter = seq->private; + struct hlist_node *n; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); - list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { iter->l = &ip_vs_conn_tab[idx]; - return cp; + return cp; } } ct_read_unlock_bh(idx); @@ -930,7 +934,8 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; - struct list_head *e, *l = iter->l; + struct hlist_node *e; + struct hlist_head *l = iter->l; int idx; ++*pos; @@ -938,15 +943,15 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) return ip_vs_conn_array(seq, 0); /* more on same hash chain? */ - if ((e = cp->c_list.next) != l) - return list_entry(e, struct ip_vs_conn, c_list); + if ((e = cp->c_list.next)) + return hlist_entry(e, struct ip_vs_conn, c_list); idx = l - ip_vs_conn_tab; ct_read_unlock_bh(idx); while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); - list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, e, &ip_vs_conn_tab[idx], c_list) { iter->l = &ip_vs_conn_tab[idx]; return cp; } @@ -959,7 +964,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) { struct ip_vs_iter_state *iter = seq->private; - struct list_head *l = iter->l; + struct hlist_head *l = iter->l; if (l) ct_read_unlock_bh(l - ip_vs_conn_tab); @@ -1148,13 +1153,14 @@ void ip_vs_random_dropentry(struct net *net) */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { unsigned hash = net_random() & ip_vs_conn_tab_mask; + struct hlist_node *n; /* * Lock is actually needed in this loop. */ ct_write_lock_bh(hash); - list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + hlist_for_each_entry(cp, n, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; @@ -1202,12 +1208,14 @@ static void ip_vs_conn_flush(struct net *net) flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { + struct hlist_node *n; + /* * Lock is actually needed in this loop. */ ct_write_lock_bh(idx); - list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_for_each_entry(cp, n, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) continue; IP_VS_DBG(4, "del connection\n"); @@ -1265,8 +1273,7 @@ int __init ip_vs_conn_init(void) /* * Allocate the connection hash table and initialize its list heads */ - ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * - sizeof(struct list_head)); + ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * sizeof(*ip_vs_conn_tab)); if (!ip_vs_conn_tab) return -ENOMEM; @@ -1286,9 +1293,8 @@ int __init ip_vs_conn_init(void) IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n", sizeof(struct ip_vs_conn)); - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - INIT_LIST_HEAD(&ip_vs_conn_tab[idx]); - } + for (idx = 0; idx < ip_vs_conn_tab_size; idx++) + INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); -- cgit v1.1 From b7440a14f28492bac30d7d43fd982fd210c6e971 Mon Sep 17 00:00:00 2001 From: Anand Gadiyar Date: Tue, 22 Feb 2011 12:43:09 +0530 Subject: Bluetooth: fix build break on hci_sock.c Linux-next as of 20110217 complains when building for OMAP1. LD vmlinux `hci_sock_cleanup' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o `hci_sock_cleanup' referenced in section `.init.text' of net/built-in.o: defined in discarded section `.exit.text' of net/built-in.o make: *** [vmlinux] Error 1 A recent patch by Gustavo (Bluetooth: Merge L2CAP and SCO modules into bluetooth.ko) introduced this by calling the hci_sock_cleanup function in the error path of bt_init. Fix this by dropping the __exit marking for hci_sock_cleanup. Signed-off-by: Anand Gadiyar Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d50e961..295e4a8 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -861,7 +861,7 @@ error: return err; } -void __exit hci_sock_cleanup(void) +void hci_sock_cleanup(void) { if (bt_sock_unregister(BTPROTO_HCI) < 0) BT_ERR("HCI socket unregistration failed"); -- cgit v1.1 From eaefd1105bc431ef329599e307a07f2a36ae7872 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Feb 2011 03:26:36 +0000 Subject: net: add __rcu annotations to sk_wq and wq Add proper RCU annotations/verbs to sk_wq and wq members Fix __sctp_write_space() sk_sleep() abuse (and sock->wq access) Fix sunrpc sk_sleep() abuse too Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/socket.c | 9 +++++---- net/socket.c | 23 ++++++++++++++--------- net/sunrpc/svcsock.c | 32 ++++++++++++++++++++------------ net/unix/af_unix.c | 2 +- 4 files changed, 40 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 8e02550..b53b2eb 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6102,15 +6102,16 @@ static void __sctp_write_space(struct sctp_association *asoc) wake_up_interruptible(&asoc->wait); if (sctp_writeable(sk)) { - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + wait_queue_head_t *wq = sk_sleep(sk); + + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. * We have not tested with it yet. */ - if (sock->wq->fasync_list && - !(sk->sk_shutdown & SEND_SHUTDOWN)) + if (!(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); } diff --git a/net/socket.c b/net/socket.c index ac2219f..9fa1e3b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -240,17 +240,19 @@ static struct kmem_cache *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; + struct socket_wq *wq; ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); - if (!ei->socket.wq) { + wq = kmalloc(sizeof(*wq), GFP_KERNEL); + if (!wq) { kmem_cache_free(sock_inode_cachep, ei); return NULL; } - init_waitqueue_head(&ei->socket.wq->wait); - ei->socket.wq->fasync_list = NULL; + init_waitqueue_head(&wq->wait); + wq->fasync_list = NULL; + RCU_INIT_POINTER(ei->socket.wq, wq); ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; @@ -273,9 +275,11 @@ static void wq_free_rcu(struct rcu_head *head) static void sock_destroy_inode(struct inode *inode) { struct socket_alloc *ei; + struct socket_wq *wq; ei = container_of(inode, struct socket_alloc, vfs_inode); - call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + wq = rcu_dereference_protected(ei->socket.wq, 1); + call_rcu(&wq->rcu, wq_free_rcu); kmem_cache_free(sock_inode_cachep, ei); } @@ -524,7 +528,7 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->wq->fasync_list) + if (rcu_dereference_protected(sock->wq, 1)->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); percpu_sub(sockets_in_use, 1); @@ -1108,15 +1112,16 @@ static int sock_fasync(int fd, struct file *filp, int on) { struct socket *sock = filp->private_data; struct sock *sk = sock->sk; + struct socket_wq *wq; if (sk == NULL) return -EINVAL; lock_sock(sk); + wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); + fasync_helper(fd, filp, on, &wq->fasync_list); - fasync_helper(fd, filp, on, &sock->wq->fasync_list); - - if (!sock->wq->fasync_list) + if (!wq->fasync_list) sock_reset_flag(sk, SOCK_FASYNC); else sock_set_flag(sk, SOCK_FASYNC); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d802e94..b7d435c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -420,6 +420,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, static void svc_udp_data_ready(struct sock *sk, int count) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", @@ -428,8 +429,8 @@ static void svc_udp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -438,6 +439,7 @@ static void svc_udp_data_ready(struct sock *sk, int count) static void svc_write_space(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); + wait_queue_head_t *wq = sk_sleep(sk); if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", @@ -445,10 +447,10 @@ static void svc_write_space(struct sock *sk) svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { + if (wq && waitqueue_active(wq)) { dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); - wake_up_interruptible(sk_sleep(sk)); + wake_up_interruptible(wq); } } @@ -739,6 +741,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq; dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); @@ -761,8 +764,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) printk("svc: socket %p: no user data\n", sk); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_all(sk_sleep(sk)); + wq = sk_sleep(sk); + if (wq && waitqueue_active(wq)) + wake_up_interruptible_all(wq); } /* @@ -771,6 +775,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) static void svc_tcp_state_change(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", sk, sk->sk_state, sk->sk_user_data); @@ -781,13 +786,14 @@ static void svc_tcp_state_change(struct sock *sk) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible_all(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible_all(wq); } static void svc_tcp_data_ready(struct sock *sk, int count) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; + wait_queue_head_t *wq = sk_sleep(sk); dprintk("svc: socket %p TCP data ready (svsk %p)\n", sk, sk->sk_user_data); @@ -795,8 +801,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* @@ -1531,6 +1537,7 @@ static void svc_sock_detach(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sock *sk = svsk->sk_sk; + wait_queue_head_t *wq; dprintk("svc: svc_sock_detach(%p)\n", svsk); @@ -1539,8 +1546,9 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_data_ready = svsk->sk_odata; sk->sk_write_space = svsk->sk_owspace; - if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + wq = sk_sleep(sk); + if (wq && waitqueue_active(wq)) + wake_up_interruptible(wq); } /* diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d8d98d5..217fb7f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1171,7 +1171,7 @@ restart: newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); - newsk->sk_wq = &newu->peer_wq; + RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); /* copy address information from listening to new sock*/ -- cgit v1.1 From 86fce3ba1e731cf6d97a4157a192ffa60dc7ec0b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 20 Feb 2011 16:14:23 +0000 Subject: cls_u32: fix sparse warnings The variable _data is used in asm-generic to define sections which causes sparse warnings, so just rename the variable. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 966920c..3b93fc0 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -134,12 +134,12 @@ next_knode: for (i = n->sel.nkeys; i > 0; i--, key++) { int toff = off + key->off + (off2 & key->offmask); - __be32 *data, _data; + __be32 *data, hdata; if (skb_headroom(skb) + toff > INT_MAX) goto out; - data = skb_header_pointer(skb, toff, 4, &_data); + data = skb_header_pointer(skb, toff, 4, &hdata); if (!data) goto out; if ((*data ^ key->val) & key->mask) { @@ -187,10 +187,10 @@ check_terminal: ht = n->ht_down; sel = 0; if (ht->divisor) { - __be32 *data, _data; + __be32 *data, hdata; data = skb_header_pointer(skb, off + n->sel.hoff, 4, - &_data); + &hdata); if (!data) goto out; sel = ht->divisor & u32_hash_fold(*data, &n->sel, @@ -202,11 +202,11 @@ check_terminal: if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { off2 = n->sel.off + 3; if (n->sel.flags & TC_U32_VAROFFSET) { - __be16 *data, _data; + __be16 *data, hdata; data = skb_header_pointer(skb, off + n->sel.offoff, - 2, &_data); + 2, &hdata); if (!data) goto out; off2 += ntohs(n->sel.offmask & *data) >> -- cgit v1.1 From 05d8402576c9c1b85bfc9e4f9d6a21c27ccbd5b1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 17:47:10 -0800 Subject: xfrm: Mark flowi arg to ->get_tos() const. Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 19fbdec..ef12e68 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -56,7 +56,7 @@ static int xfrm4_get_saddr(struct net *net, return 0; } -static int xfrm4_get_tos(struct flowi *fl) +static int xfrm4_get_tos(const struct flowi *fl) { return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 834dc02..753e9a1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -67,7 +67,7 @@ static int xfrm6_get_saddr(struct net *net, return 0; } -static int xfrm6_get_tos(struct flowi *fl) +static int xfrm6_get_tos(const struct flowi *fl) { return 0; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7a8e2c7..f8ccb97 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1256,7 +1256,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, * still valid. */ -static inline int xfrm_get_tos(struct flowi *fl, int family) +static inline int xfrm_get_tos(const struct flowi *fl, int family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); int tos; -- cgit v1.1 From 0c7b3eefb4ab8df245e94feb0d83c1c3450a3d87 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 17:48:57 -0800 Subject: xfrm: Mark flowi arg to ->fill_dst() const. Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index ef12e68..1e9844d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -68,7 +68,7 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, } static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - struct flowi *fl) + const struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 753e9a1..f2fa904 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -87,7 +87,7 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, } static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - struct flowi *fl) + const struct flowi *fl) { struct rt6_info *rt = (struct rt6_info*)xdst->route; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f8ccb97..fa0b7f3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1369,7 +1369,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, } static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, - struct flowi *fl) + const struct flowi *fl) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); -- cgit v1.1 From 73e5ebb20f2809e2eb0b904448481e010c2599d7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 17:51:44 -0800 Subject: xfrm: Mark flowi arg to ->init_tempsel() const. Signed-off-by: David S. Miller --- net/ipv4/xfrm4_state.c | 2 +- net/ipv6/xfrm6_state.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 4794762..19eb560 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -21,7 +21,7 @@ static int xfrm4_init_flags(struct xfrm_state *x) } static void -__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { sel->daddr.a4 = fl->fl4_dst; sel->saddr.a4 = fl->fl4_src; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a67575d..68a14c0 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,7 +20,7 @@ #include static void -__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { /* Initialize temporary selector matching only * to current session. */ -- cgit v1.1 From 8f029de281b26ec9fd5cd77294db1d35d9876f1a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 17:59:59 -0800 Subject: xfrm: Mark flowi arg to xfrm_type->reject() const. Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 3 ++- net/xfrm/xfrm_policy.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index d6e9599..f3e3ca9 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -203,7 +203,8 @@ static inline int mip6_report_rl_allow(struct timeval *stamp, return allow; } -static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) +static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, + const struct flowi *fl) { struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fa0b7f3..ccd47cf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1907,7 +1907,7 @@ int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, EXPORT_SYMBOL(xfrm_lookup); static inline int -xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) +xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { struct xfrm_state *x; -- cgit v1.1 From e1ad2ab2cf0cabcd81861e2c61870fc27bb27ded Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:07:39 -0800 Subject: xfrm: Mark flowi arg to xfrm_selector_match() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ccd47cf..71e6dc2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -58,7 +58,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); static inline int -__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) +__xfrm4_selector_match(struct xfrm_selector *sel, const struct flowi *fl) { return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && @@ -69,7 +69,7 @@ __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl) } static inline int -__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) +__xfrm6_selector_match(struct xfrm_selector *sel, const struct flowi *fl) { return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && @@ -79,8 +79,8 @@ __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl) (fl->oif == sel->ifindex || !sel->ifindex); } -int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, - unsigned short family) +int xfrm_selector_match(struct xfrm_selector *sel, const struct flowi *fl, + unsigned short family) { switch (family) { case AF_INET: -- cgit v1.1 From 4a08ab0fe424925352729f1c99b39b1ed876fb14 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:21:31 -0800 Subject: xfrm: Mark flowi arg to xfrm_state_look_at() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 220ebc0..bffe83d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -746,7 +746,7 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) } static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, - struct flowi *fl, unsigned short family, + const struct flowi *fl, unsigned short family, xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_state **best, int *acq_in_progress, int *error) -- cgit v1.1 From 1a898592b2bde7b109e121ccb7498d40396fb5c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:22:34 -0800 Subject: xfrm: Mark flowi arg to xfrm_init_tempstate() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index bffe83d..674f278 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -656,7 +656,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) EXPORT_SYMBOL(xfrm_sad_getinfo); static int -xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, +xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family) -- cgit v1.1 From b520e9f616f4f29c8d2557ba704b74ce6d79ff07 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:24:19 -0800 Subject: xfrm: Mark flowi arg to xfrm_state_find() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 674f278..30a0f17 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -785,7 +785,7 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, struct xfrm_state * xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, - struct flowi *fl, struct xfrm_tmpl *tmpl, + const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family) { -- cgit v1.1 From 47209abd7925acb3f61ae59884247b612b8904c8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:29:20 -0800 Subject: xfrm: Kill strict arg to xfrm_bundle_ok(). Always set to "0". Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 71e6dc2..1e11398 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -51,7 +51,7 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - struct flowi *fl, int family, int strict); + const struct flowi *fl, int family); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -2210,7 +2210,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); + return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -2283,7 +2283,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) */ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, - struct flowi *fl, int family, int strict) + const struct flowi *fl, int family) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -2320,11 +2320,6 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; - if (strict && fl && - !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) - return 0; - mtu = dst_mtu(dst->child); if (xdst->child_mtu_cached != mtu) { last = xdst; -- cgit v1.1 From 062cdb43b8a8de888a6e2abd31228163cc5d8ee1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:31:08 -0800 Subject: xfrm: Mark flowi arg to xfrm_policy_{lookup_by_type,match}() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1e11398..4a5092a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -864,7 +864,7 @@ EXPORT_SYMBOL(xfrm_policy_walk_done); * * Returns 0 if policy found, else an -errno. */ -static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, +static int xfrm_policy_match(struct xfrm_policy *pol, const struct flowi *fl, u8 type, u16 family, int dir) { struct xfrm_selector *sel = &pol->selector; @@ -884,7 +884,7 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, } static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, - struct flowi *fl, + const struct flowi *fl, u16 family, u8 dir) { int err; -- cgit v1.1 From 73ff93cd0249e822c4fee367e1fd4ad4a45a5515 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:33:42 -0800 Subject: xfrm: Mark flowi arg to xfrm_expand_policies() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4a5092a..84e4f74 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -941,7 +941,7 @@ fail: } static struct xfrm_policy * -__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir) +__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; @@ -1542,7 +1542,7 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) #endif } -static int xfrm_expand_policies(struct flowi *fl, u16 family, +static int xfrm_expand_policies(const struct flowi *fl, u16 family, struct xfrm_policy **pols, int *num_pols, int *num_xfrms) { -- cgit v1.1 From a6c2e611152fcdc67047aaa56b75b9cfc592ce71 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:35:39 -0800 Subject: xfrm: Mark flowi arg to xfrm_tmpl_resolve{,_one}() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 84e4f74..3d45456 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1157,9 +1157,8 @@ xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, + struct xfrm_state **xfrm, unsigned short family) { struct net *net = xp_net(policy); int nx; @@ -1214,9 +1213,8 @@ fail: } static int -xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, + struct xfrm_state **xfrm, unsigned short family) { struct xfrm_state *tp[XFRM_MAX_DEPTH]; struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; -- cgit v1.1 From 98313adaac2bdaeab0b60fb3c6bfc94dd6704d6f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:36:50 -0800 Subject: xfrm: Mark flowi arg to xfrm_bundle_create() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3d45456..a558dc7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1390,7 +1390,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx, - struct flowi *fl, + const struct flowi *fl, struct dst_entry *dst) { struct net *net = xp_net(policy); -- cgit v1.1 From 3f0e18fb0e33784525322e51cbfa10369cebd912 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:38:14 -0800 Subject: xfrm: Mark flowi arg to xfrm_dst_{alloc_copy,update_origin}() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a558dc7..21d29e7b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1506,7 +1506,7 @@ free_dst: } static int inline -xfrm_dst_alloc_copy(void **target, void *src, int size) +xfrm_dst_alloc_copy(void **target, const void *src, int size) { if (!*target) { *target = kmalloc(size, GFP_ATOMIC); @@ -1530,7 +1530,7 @@ xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) } static int inline -xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) +xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; -- cgit v1.1 From 4ca2e685114c55e6777022a46849795d2aa1d31a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:38:51 -0800 Subject: xfrm: Mark flowi arg to xfrm_resolve_and_create_bundle() const. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 21d29e7b..ef899a8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1586,7 +1586,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, - struct flowi *fl, u16 family, + const struct flowi *fl, u16 family, struct dst_entry *dst_orig) { struct net *net = xp_net(pols[0]); -- cgit v1.1 From dee9f4bceb5fd9dbfcc1567148fccdbf16d6a38a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 22 Feb 2011 18:44:31 -0800 Subject: net: Make flow cache paths use a const struct flowi. Signed-off-by: David S. Miller --- net/core/flow.c | 14 +++++++------- net/xfrm/xfrm_policy.c | 13 ++++++++----- 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index 127c8a7..990703b 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -172,9 +172,9 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - struct flowi *key) + const struct flowi *key) { - u32 *k = (u32 *) key; + const u32 *k = (const u32 *) key; return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); @@ -186,17 +186,17 @@ typedef unsigned long flow_compare_t; * important assumptions that we can here, such as alignment and * constant size. */ -static int flow_key_compare(struct flowi *key1, struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) { - flow_compare_t *k1, *k1_lim, *k2; + const flow_compare_t *k1, *k1_lim, *k2; const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); - k1 = (flow_compare_t *) key1; + k1 = (const flow_compare_t *) key1; k1_lim = k1 + n_elem; - k2 = (flow_compare_t *) key2; + k2 = (const flow_compare_t *) key2; do { if (*k1++ != *k2++) @@ -207,7 +207,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) } struct flow_cache_object * -flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, +flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx) { struct flow_cache *fc = &flow_cache_global; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ef899a8..28c865a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -954,7 +954,7 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir } static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *old_obj, void *ctx) { struct xfrm_policy *pol; @@ -990,7 +990,8 @@ static inline int policy_to_flow_dir(int dir) } } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, + const struct flowi *fl) { struct xfrm_policy *pol; @@ -1629,7 +1630,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, } static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir, +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) { struct dst_entry *dst_orig = (struct dst_entry *)ctx; @@ -1733,7 +1734,8 @@ error: * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + const struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; @@ -1889,7 +1891,8 @@ dropdst: } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + const struct flowi *fl, struct sock *sk, int flags) { int err = __xfrm_lookup(net, dst_p, fl, sk, flags); -- cgit v1.1 From f3e85b9edeaf8ad0446a37a40c873f3f8898c57d Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Wed, 23 Feb 2011 13:04:32 +0530 Subject: mac80211: Fix a race on enabling power save. There is a race on sending a data frame before the tx completion of nullfunc frame for enabling power save. As the data quickly follows the nullfunc frame, the AP thinks that the station is out of power save and continues to send the frames. Whereas in the station, the nullfunc ack will be processed after the tx completion of data frame and mac80211 goes to powersave. Thus the power save state mismatch between the station and the AP causes some data loss and some applications fail because of that. This patch fixes this issue. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 14 +++++++++++++- net/mac80211/status.c | 2 -- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7b3f9df..abb0116 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -738,9 +738,19 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) return; if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && - (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) + (!(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED))) { + netif_tx_stop_all_queues(sdata->dev); + /* + * Flush all the frames queued in the driver before + * going to power save + */ + drv_flush(local, false); ieee80211_send_nullfunc(local, sdata, 1); + /* Flush once again to get the tx status of nullfunc frame */ + drv_flush(local, false); + } + if (!((local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) || (ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { @@ -748,6 +758,8 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } + + netif_tx_start_all_queues(sdata->dev); } void ieee80211_dynamic_ps_timer(unsigned long data) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 010a559..8651851 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -318,8 +318,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) if (info->flags & IEEE80211_TX_STAT_ACK) { local->ps_sdata->u.mgd.flags |= IEEE80211_STA_NULLFUNC_ACKED; - ieee80211_queue_work(&local->hw, - &local->dynamic_ps_enable_work); } else mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(10)); -- cgit v1.1 From 6ebacbb79d2d05978ba50a24d8cbe2a76ff2014c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 23 Feb 2011 15:06:08 +0100 Subject: mac80211: rename RX_FLAG_TSFT The flag isn't very descriptive -- the intention is that the driver provides a TSF timestamp at the beginning of the MPDU -- make that clearer by renaming the flag to RX_FLAG_MACTIME_MPDU. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 2 +- net/mac80211/rx.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a42aa61..463271f 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -355,7 +355,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (memcmp(cbss->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) goto put_bss; - if (rx_status->flag & RX_FLAG_TSFT) { + if (rx_status->flag & RX_FLAG_MACTIME_MPDU) { /* * For correct IBSS merging we need mactime; since mactime is * defined as the time the first data symbol of the frame hits diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f502634..5b53423 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -77,7 +77,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, /* always present fields */ len = sizeof(struct ieee80211_radiotap_header) + 9; - if (status->flag & RX_FLAG_TSFT) + if (status->flag & RX_FLAG_MACTIME_MPDU) len += 8; if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) len += 1; @@ -123,7 +123,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* the order of the following fields is important */ /* IEEE80211_RADIOTAP_TSFT */ - if (status->flag & RX_FLAG_TSFT) { + if (status->flag & RX_FLAG_MACTIME_MPDU) { put_unaligned_le64(status->mactime, pos); rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); -- cgit v1.1 From e13e02a3c68d899169c78d9a18689bd73491d59a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 23 Feb 2011 10:56:17 +0000 Subject: net_sched: SFB flow scheduler This is the Stochastic Fair Blue scheduler, based on work from : W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: A New Class of Active Queue Management Algorithms. U. Michigan CSE-TR-387-99, April 1999. http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf This implementation is based on work done by Juliusz Chroboczek General SFB algorithm can be found in figure 14, page 15: B[l][n] : L x N array of bins (L levels, N bins per level) enqueue() Calculate hash function values h{0}, h{1}, .. h{L-1} Update bins at each level for i = 0 to L - 1 if (B[i][h{i}].qlen > bin_size) B[i][h{i}].p_mark += p_increment; else if (B[i][h{i}].qlen == 0) B[i][h{i}].p_mark -= p_decrement; p_min = min(B[0][h{0}].p_mark ... B[L-1][h{L-1}].p_mark); if (p_min == 1.0) ratelimit(); else mark/drop with probabilty p_min; I did the adaptation of Juliusz code to meet current kernel standards, and various changes to address previous comments : http://thread.gmane.org/gmane.linux.network/90225 http://thread.gmane.org/gmane.linux.network/90375 Default flow classifier is the rxhash introduced by RPS in 2.6.35, but we can use an external flow classifier if wanted. tc qdisc add dev $DEV parent 1:11 handle 11: \ est 0.5sec 2sec sfb limit 128 tc filter add dev $DEV protocol ip parent 11: handle 3 \ flow hash keys dst divisor 1024 Notes: 1) SFB default child qdisc is pfifo_fast. It can be changed by another qdisc but a child qdisc MUST not drop a packet previously queued. This is because SFB needs to handle a dequeued packet in order to maintain its virtual queue states. pfifo_head_drop or CHOKe should not be used. 2) ECN is enabled by default, unlike RED/CHOKe/GRED With help from Patrick McHardy & Andi Kleen Signed-off-by: Eric Dumazet CC: Juliusz Chroboczek CC: Stephen Hemminger CC: Patrick McHardy CC: Andi Kleen CC: John W. Linville Signed-off-by: David S. Miller --- net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/sch_sfb.c | 709 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 721 insertions(+) create mode 100644 net/sched/sch_sfb.c (limited to 'net') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 8c19b6e..a7a5583 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -126,6 +126,17 @@ config NET_SCH_RED To compile this code as a module, choose M here: the module will be called sch_red. +config NET_SCH_SFB + tristate "Stochastic Fair Blue (SFB)" + ---help--- + Say Y here if you want to use the Stochastic Fair Blue (SFB) + packet scheduling algorithm. + + See the top of for more details. + + To compile this code as a module, choose M here: the + module will be called sch_sfb. + config NET_SCH_SFQ tristate "Stochastic Fairness Queueing (SFQ)" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 06c6cdf..2e77b8d 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o +obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c new file mode 100644 index 0000000..0a833d0 --- /dev/null +++ b/net/sched/sch_sfb.c @@ -0,0 +1,709 @@ +/* + * net/sched/sch_sfb.c Stochastic Fair Blue + * + * Copyright (c) 2008-2011 Juliusz Chroboczek + * Copyright (c) 2011 Eric Dumazet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: + * A New Class of Active Queue Management Algorithms. + * U. Michigan CSE-TR-387-99, April 1999. + * + * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) + * This implementation uses L = 8 and N = 16 + * This permits us to split one 32bit hash (provided per packet by rxhash or + * external classifier) into 8 subhashes of 4 bits. + */ +#define SFB_BUCKET_SHIFT 4 +#define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */ +#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1) +#define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */ + +/* SFB algo uses a virtual queue, named "bin" */ +struct sfb_bucket { + u16 qlen; /* length of virtual queue */ + u16 p_mark; /* marking probability */ +}; + +/* We use a double buffering right before hash change + * (Section 4.4 of SFB reference : moving hash functions) + */ +struct sfb_bins { + u32 perturbation; /* jhash perturbation */ + struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; +}; + +struct sfb_sched_data { + struct Qdisc *qdisc; + struct tcf_proto *filter_list; + unsigned long rehash_interval; + unsigned long warmup_time; /* double buffering warmup time in jiffies */ + u32 max; + u32 bin_size; /* maximum queue length per bin */ + u32 increment; /* d1 */ + u32 decrement; /* d2 */ + u32 limit; /* HARD maximal queue length */ + u32 penalty_rate; + u32 penalty_burst; + u32 tokens_avail; + unsigned long rehash_time; + unsigned long token_time; + + u8 slot; /* current active bins (0 or 1) */ + bool double_buffering; + struct sfb_bins bins[2]; + + struct { + u32 earlydrop; + u32 penaltydrop; + u32 bucketdrop; + u32 queuedrop; + u32 childdrop; /* drops in child qdisc */ + u32 marked; /* ECN mark */ + } stats; +}; + +/* + * Each queued skb might be hashed on one or two bins + * We store in skb_cb the two hash values. + * (A zero value means double buffering was not used) + */ +struct sfb_skb_cb { + u32 hashes[2]; +}; + +static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); + return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; +} + +/* + * If using 'internal' SFB flow classifier, hash comes from skb rxhash + * If using external classifier, hash comes from the classid. + */ +static u32 sfb_hash(const struct sk_buff *skb, u32 slot) +{ + return sfb_skb_cb(skb)->hashes[slot]; +} + +/* Probabilities are coded as Q0.16 fixed-point values, + * with 0xFFFF representing 65535/65536 (almost 1.0) + * Addition and subtraction are saturating in [0, 65535] + */ +static u32 prob_plus(u32 p1, u32 p2) +{ + u32 res = p1 + p2; + + return min_t(u32, res, SFB_MAX_PROB); +} + +static u32 prob_minus(u32 p1, u32 p2) +{ + return p1 > p2 ? p1 - p2 : 0; +} + +static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) +{ + int i; + struct sfb_bucket *b = &q->bins[slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b[hash].qlen < 0xFFFF) + b[hash].qlen++; + b += SFB_NUMBUCKETS; /* next level */ + } +} + +static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +{ + u32 sfbhash; + + sfbhash = sfb_hash(skb, 0); + if (sfbhash) + increment_one_qlen(sfbhash, 0, q); + + sfbhash = sfb_hash(skb, 1); + if (sfbhash) + increment_one_qlen(sfbhash, 1, q); +} + +static void decrement_one_qlen(u32 sfbhash, u32 slot, + struct sfb_sched_data *q) +{ + int i; + struct sfb_bucket *b = &q->bins[slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b[hash].qlen > 0) + b[hash].qlen--; + b += SFB_NUMBUCKETS; /* next level */ + } +} + +static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +{ + u32 sfbhash; + + sfbhash = sfb_hash(skb, 0); + if (sfbhash) + decrement_one_qlen(sfbhash, 0, q); + + sfbhash = sfb_hash(skb, 1); + if (sfbhash) + decrement_one_qlen(sfbhash, 1, q); +} + +static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q) +{ + b->p_mark = prob_minus(b->p_mark, q->decrement); +} + +static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q) +{ + b->p_mark = prob_plus(b->p_mark, q->increment); +} + +static void sfb_zero_all_buckets(struct sfb_sched_data *q) +{ + memset(&q->bins, 0, sizeof(q->bins)); +} + +/* + * compute max qlen, max p_mark, and avg p_mark + */ +static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q) +{ + int i; + u32 qlen = 0, prob = 0, totalpm = 0; + const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0]; + + for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) { + if (qlen < b->qlen) + qlen = b->qlen; + totalpm += b->p_mark; + if (prob < b->p_mark) + prob = b->p_mark; + b++; + } + *prob_r = prob; + *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS); + return qlen; +} + + +static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) +{ + q->bins[slot].perturbation = net_random(); +} + +static void sfb_swap_slot(struct sfb_sched_data *q) +{ + sfb_init_perturbation(q->slot, q); + q->slot ^= 1; + q->double_buffering = false; +} + +/* Non elastic flows are allowed to use part of the bandwidth, expressed + * in "penalty_rate" packets per second, with "penalty_burst" burst + */ +static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q) +{ + if (q->penalty_rate == 0 || q->penalty_burst == 0) + return true; + + if (q->tokens_avail < 1) { + unsigned long age = min(10UL * HZ, jiffies - q->token_time); + + q->tokens_avail = (age * q->penalty_rate) / HZ; + if (q->tokens_avail > q->penalty_burst) + q->tokens_avail = q->penalty_burst; + q->token_time = jiffies; + if (q->tokens_avail < 1) + return true; + } + + q->tokens_avail--; + return false; +} + +static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q, + int *qerr, u32 *salt) +{ + struct tcf_result res; + int result; + + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return false; + } +#endif + *salt = TC_H_MIN(res.classid); + return true; + } + return false; +} + +static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + int i; + u32 p_min = ~0; + u32 minqlen = ~0; + u32 r, slot, salt, sfbhash; + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + + if (q->rehash_interval > 0) { + unsigned long limit = q->rehash_time + q->rehash_interval; + + if (unlikely(time_after(jiffies, limit))) { + sfb_swap_slot(q); + q->rehash_time = jiffies; + } else if (unlikely(!q->double_buffering && q->warmup_time > 0 && + time_after(jiffies, limit - q->warmup_time))) { + q->double_buffering = true; + } + } + + if (q->filter_list) { + /* If using external classifiers, get result and record it. */ + if (!sfb_classify(skb, q, &ret, &salt)) + goto other_drop; + } else { + salt = skb_get_rxhash(skb); + } + + slot = q->slot; + + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + if (!sfbhash) + sfbhash = 1; + sfb_skb_cb(skb)->hashes[slot] = sfbhash; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b->qlen == 0) + decrement_prob(b, q); + else if (b->qlen >= q->bin_size) + increment_prob(b, q); + if (minqlen > b->qlen) + minqlen = b->qlen; + if (p_min > b->p_mark) + p_min = b->p_mark; + } + + slot ^= 1; + sfb_skb_cb(skb)->hashes[slot] = 0; + + if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { + sch->qstats.overlimits++; + if (minqlen >= q->max) + q->stats.bucketdrop++; + else + q->stats.queuedrop++; + goto drop; + } + + if (unlikely(p_min >= SFB_MAX_PROB)) { + /* Inelastic flow */ + if (q->double_buffering) { + sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + if (!sfbhash) + sfbhash = 1; + sfb_skb_cb(skb)->hashes[slot] = sfbhash; + + for (i = 0; i < SFB_LEVELS; i++) { + u32 hash = sfbhash & SFB_BUCKET_MASK; + struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; + + sfbhash >>= SFB_BUCKET_SHIFT; + if (b->qlen == 0) + decrement_prob(b, q); + else if (b->qlen >= q->bin_size) + increment_prob(b, q); + } + } + if (sfb_rate_limit(skb, q)) { + sch->qstats.overlimits++; + q->stats.penaltydrop++; + goto drop; + } + goto enqueue; + } + + r = net_random() & SFB_MAX_PROB; + + if (unlikely(r < p_min)) { + if (unlikely(p_min > SFB_MAX_PROB / 2)) { + /* If we're marking that many packets, then either + * this flow is unresponsive, or we're badly congested. + * In either case, we want to start dropping packets. + */ + if (r < (p_min - SFB_MAX_PROB / 2) * 2) { + q->stats.earlydrop++; + goto drop; + } + } + if (INET_ECN_set_ce(skb)) { + q->stats.marked++; + } else { + q->stats.earlydrop++; + goto drop; + } + } + +enqueue: + ret = qdisc_enqueue(skb, child); + if (likely(ret == NET_XMIT_SUCCESS)) { + sch->q.qlen++; + increment_qlen(skb, q); + } else if (net_xmit_drop_count(ret)) { + q->stats.childdrop++; + sch->qstats.drops++; + } + return ret; + +drop: + qdisc_drop(skb, sch); + return NET_XMIT_CN; +other_drop: + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; +} + +static struct sk_buff *sfb_dequeue(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + struct sk_buff *skb; + + skb = child->dequeue(q->qdisc); + + if (skb) { + qdisc_bstats_update(sch, skb); + sch->q.qlen--; + decrement_qlen(skb, q); + } + + return skb; +} + +static struct sk_buff *sfb_peek(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child = q->qdisc; + + return child->ops->peek(child); +} + +/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */ + +static void sfb_reset(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + qdisc_reset(q->qdisc); + sch->q.qlen = 0; + q->slot = 0; + q->double_buffering = false; + sfb_zero_all_buckets(q); + sfb_init_perturbation(0, q); +} + +static void sfb_destroy(struct Qdisc *sch) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + qdisc_destroy(q->qdisc); +} + +static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { + [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) }, +}; + +static const struct tc_sfb_qopt sfb_default_ops = { + .rehash_interval = 600 * MSEC_PER_SEC, + .warmup_time = 60 * MSEC_PER_SEC, + .limit = 0, + .max = 25, + .bin_size = 20, + .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */ + .decrement = (SFB_MAX_PROB + 3000) / 6000, + .penalty_rate = 10, + .penalty_burst = 20, +}; + +static int sfb_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct Qdisc *child; + struct nlattr *tb[TCA_SFB_MAX + 1]; + const struct tc_sfb_qopt *ctl = &sfb_default_ops; + u32 limit; + int err; + + if (opt) { + err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy); + if (err < 0) + return -EINVAL; + + if (tb[TCA_SFB_PARMS] == NULL) + return -EINVAL; + + ctl = nla_data(tb[TCA_SFB_PARMS]); + } + + limit = ctl->limit; + if (limit == 0) + limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); + + child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit); + if (IS_ERR(child)) + return PTR_ERR(child); + + sch_tree_lock(sch); + + qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); + qdisc_destroy(q->qdisc); + q->qdisc = child; + + q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); + q->warmup_time = msecs_to_jiffies(ctl->warmup_time); + q->rehash_time = jiffies; + q->limit = limit; + q->increment = ctl->increment; + q->decrement = ctl->decrement; + q->max = ctl->max; + q->bin_size = ctl->bin_size; + q->penalty_rate = ctl->penalty_rate; + q->penalty_burst = ctl->penalty_burst; + q->tokens_avail = ctl->penalty_burst; + q->token_time = jiffies; + + q->slot = 0; + q->double_buffering = false; + sfb_zero_all_buckets(q); + sfb_init_perturbation(0, q); + sfb_init_perturbation(1, q); + + sch_tree_unlock(sch); + + return 0; +} + +static int sfb_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + q->qdisc = &noop_qdisc; + return sfb_change(sch, opt); +} + +static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct nlattr *opts; + struct tc_sfb_qopt opt = { + .rehash_interval = jiffies_to_msecs(q->rehash_interval), + .warmup_time = jiffies_to_msecs(q->warmup_time), + .limit = q->limit, + .max = q->max, + .bin_size = q->bin_size, + .increment = q->increment, + .decrement = q->decrement, + .penalty_rate = q->penalty_rate, + .penalty_burst = q->penalty_burst, + }; + + sch->qstats.backlog = q->qdisc->qstats.backlog; + opts = nla_nest_start(skb, TCA_OPTIONS); + NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt); + return nla_nest_end(skb, opts); + +nla_put_failure: + nla_nest_cancel(skb, opts); + return -EMSGSIZE; +} + +static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + struct tc_sfb_xstats st = { + .earlydrop = q->stats.earlydrop, + .penaltydrop = q->stats.penaltydrop, + .bucketdrop = q->stats.bucketdrop, + .queuedrop = q->stats.queuedrop, + .childdrop = q->stats.childdrop, + .marked = q->stats.marked, + }; + + st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q); + + return gnet_stats_copy_app(d, &st, sizeof(st)); +} + +static int sfb_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + return -ENOSYS; +} + +static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->qdisc; + q->qdisc = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + return q->qdisc; +} + +static unsigned long sfb_get(struct Qdisc *sch, u32 classid) +{ + return 1; +} + +static void sfb_put(struct Qdisc *sch, unsigned long arg) +{ +} + +static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + return -ENOSYS; +} + +static int sfb_delete(struct Qdisc *sch, unsigned long cl) +{ + return -ENOSYS; +} + +static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) +{ + if (!walker->stop) { + if (walker->count >= walker->skip) + if (walker->fn(sch, 1, walker) < 0) { + walker->stop = 1; + return; + } + walker->count++; + } +} + +static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct sfb_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return 0; +} + + +static const struct Qdisc_class_ops sfb_class_ops = { + .graft = sfb_graft, + .leaf = sfb_leaf, + .get = sfb_get, + .put = sfb_put, + .change = sfb_change_class, + .delete = sfb_delete, + .walk = sfb_walk, + .tcf_chain = sfb_find_tcf, + .bind_tcf = sfb_bind, + .unbind_tcf = sfb_put, + .dump = sfb_dump_class, +}; + +static struct Qdisc_ops sfb_qdisc_ops __read_mostly = { + .id = "sfb", + .priv_size = sizeof(struct sfb_sched_data), + .cl_ops = &sfb_class_ops, + .enqueue = sfb_enqueue, + .dequeue = sfb_dequeue, + .peek = sfb_peek, + .init = sfb_init, + .reset = sfb_reset, + .destroy = sfb_destroy, + .change = sfb_change, + .dump = sfb_dump, + .dump_stats = sfb_dump_stats, + .owner = THIS_MODULE, +}; + +static int __init sfb_module_init(void) +{ + return register_qdisc(&sfb_qdisc_ops); +} + +static void __exit sfb_module_exit(void) +{ + unregister_qdisc(&sfb_qdisc_ops); +} + +module_init(sfb_module_init) +module_exit(sfb_module_exit) + +MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline"); +MODULE_AUTHOR("Juliusz Chroboczek"); +MODULE_AUTHOR("Eric Dumazet"); +MODULE_LICENSE("GPL"); -- cgit v1.1 From c3f52ae6a378398127acb845087ddb9e8b67493b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 09:06:48 +0000 Subject: socket: suppress sparse warnings Use __force to quiet sparse warnings for cases where the code is simulating user space pointers. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/socket.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 9fa1e3b..937d0fc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2648,7 +2648,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd, old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, &kifr); + err = dev_ioctl(net, cmd, + (struct ifreq __user __force *) &kifr); set_fs(old_fs); return err; @@ -2757,7 +2758,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, old_fs = get_fs(); set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user *)&ifr); + err = dev_ioctl(net, cmd, (void __user __force *)&ifr); set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { @@ -2862,7 +2863,8 @@ static int routing_ioctl(struct net *net, struct socket *sock, ret |= __get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { ret |= copy_from_user(devname, compat_ptr(rtdev), 15); - r4.rt_dev = devname; devname[15] = 0; + r4.rt_dev = (char __user __force *)devname; + devname[15] = 0; } else r4.rt_dev = NULL; -- cgit v1.1 From ada440e3b5c3c155c2a4d73c433b3462086dbb4a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 09:06:50 +0000 Subject: afkey: add sparse annotation about rcu Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/key/af_key.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index d87c22d..60fd2f1 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3655,6 +3655,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) } static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) + __acquires(rcu) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); @@ -3672,6 +3673,7 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) } static void pfkey_seq_stop(struct seq_file *f, void *v) + __releases(rcu) { rcu_read_unlock(); } -- cgit v1.1 From ea18fd950e3210ec6d616e2e669550dd86f74d94 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 09:06:51 +0000 Subject: mqprio: cleanups * make qdisc_ops local * add sparse annotation about expected unlock/unlock in dump_class_stats * fix indentation Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_mqprio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index ace37f9..ea17cbe 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -311,7 +311,9 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, } static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, - struct gnet_dump *d) + struct gnet_dump *d) + __releases(d->lock) + __acquires(d->lock) { struct net_device *dev = qdisc_dev(sch); @@ -389,7 +391,7 @@ static const struct Qdisc_class_ops mqprio_class_ops = { .dump_stats = mqprio_dump_class_stats, }; -struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { +static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .cl_ops = &mqprio_class_ops, .id = "mqprio", .priv_size = sizeof(struct mqprio_sched), -- cgit v1.1 From e0c563101a3f90ce4f4fa3df5ac803f5c50ebcc5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 09:06:52 +0000 Subject: em_meta: fix sparse warning gfp_t needs to be cast to integer. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/em_meta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index a889d09..e5e1747 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -401,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf) META_COLLECTOR(int_sk_alloc) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_allocation; + dst->value = (__force int) skb->sk->sk_allocation; } META_COLLECTOR(int_sk_route_caps) -- cgit v1.1 From 8e9b59b219e520cfc2f80af471c6b0e67ad9dd75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 22 Feb 2011 16:52:28 +0000 Subject: Fix "(unregistered net_device): Features changed" message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix netdev_update_features() messages on register time by moving the call further in register_netdevice(). When netdev->reg_state != NETREG_REGISTERED, netdev_name() returns "(unregistered netdevice)" even if the dev's name is already filled. Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 578415c..77e5edb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5483,8 +5483,6 @@ int register_netdevice(struct net_device *dev) if (!(dev->wanted_features & NETIF_F_SG)) dev->wanted_features &= ~NETIF_F_GSO; - netdev_update_features(dev); - /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. @@ -5501,6 +5499,8 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; + netdev_update_features(dev); + /* * Default initial state at registry is that the * device is present. -- cgit v1.1 From 14d1232f490c1c696582909fb3b69e67a8d38a34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 22 Feb 2011 16:52:28 +0000 Subject: net: avoid initial "Features changed" message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid "Features changed" message and ndo_set_features call on device registration caused by automatic enabling of GSO and GRO. Driver should have enabled hardware offloads it set in features, so the ndo_set_features() is not needed at registration time. Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 77e5edb..69a3c08 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5476,12 +5476,14 @@ int register_netdevice(struct net_device *dev) * software offloads (GSO and GRO). */ dev->hw_features |= NETIF_F_SOFT_FEATURES; - dev->wanted_features = (dev->features & dev->hw_features) - | NETIF_F_SOFT_FEATURES; + dev->features |= NETIF_F_SOFT_FEATURES; + dev->wanted_features = dev->features & dev->hw_features; /* Avoid warning from netdev_fix_features() for GSO without SG */ - if (!(dev->wanted_features & NETIF_F_SG)) + if (!(dev->wanted_features & NETIF_F_SG)) { dev->wanted_features &= ~NETIF_F_GSO; + dev->features &= ~NETIF_F_GSO; + } /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features -- cgit v1.1 From 4e4db200541d49404ff39ac482efee072dd72144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 22 Feb 2011 16:52:28 +0000 Subject: net: Fix ETHTOOL_GFEATURES compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement getting rx checksum state for not updated drivers. Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/ethtool.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 66cdc76..69a3edc 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -168,6 +168,18 @@ EXPORT_SYMBOL(ethtool_ntuple_flush); #define ETHTOOL_DEV_FEATURE_WORDS 1 +static void ethtool_get_features_compat(struct net_device *dev, + struct ethtool_get_features_block *features) +{ + if (!dev->ethtool_ops) + return; + + /* getting RX checksum */ + if (dev->ethtool_ops->get_rx_csum) + if (dev->ethtool_ops->get_rx_csum(dev)) + features[0].active |= NETIF_F_RXCSUM; +} + static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { @@ -185,6 +197,8 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr) u32 __user *sizeaddr; u32 copy_size; + ethtool_get_features_compat(dev, features); + sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size); if (get_user(copy_size, sizeaddr)) return -EFAULT; -- cgit v1.1 From 39fc0ce5710c53bad14aaba1a789eec810c556f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 22 Feb 2011 16:52:29 +0000 Subject: net: Implement SFEATURES compatibility for not updated drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use discrete setting ops for not updated drivers. This will not make them conform to full G/SFEATURES semantics, though. Signed-off-by: MichaÅ‚ MirosÅ‚aw Signed-off-by: David S. Miller --- net/core/ethtool.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 69a3edc..c1a71bb 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -178,6 +178,64 @@ static void ethtool_get_features_compat(struct net_device *dev, if (dev->ethtool_ops->get_rx_csum) if (dev->ethtool_ops->get_rx_csum(dev)) features[0].active |= NETIF_F_RXCSUM; + + /* mark legacy-changeable features */ + if (dev->ethtool_ops->set_sg) + features[0].available |= NETIF_F_SG; + if (dev->ethtool_ops->set_tx_csum) + features[0].available |= NETIF_F_ALL_CSUM; + if (dev->ethtool_ops->set_tso) + features[0].available |= NETIF_F_ALL_TSO; + if (dev->ethtool_ops->set_rx_csum) + features[0].available |= NETIF_F_RXCSUM; + if (dev->ethtool_ops->set_flags) + features[0].available |= flags_dup_features; +} + +static int ethtool_set_feature_compat(struct net_device *dev, + int (*legacy_set)(struct net_device *, u32), + struct ethtool_set_features_block *features, u32 mask) +{ + u32 do_set; + + if (!legacy_set) + return 0; + + if (!(features[0].valid & mask)) + return 0; + + features[0].valid &= ~mask; + + do_set = !!(features[0].requested & mask); + + if (legacy_set(dev, do_set) < 0) + netdev_info(dev, + "Legacy feature change (%s) failed for 0x%08x\n", + do_set ? "set" : "clear", mask); + + return 1; +} + +static int ethtool_set_features_compat(struct net_device *dev, + struct ethtool_set_features_block *features) +{ + int compat; + + if (!dev->ethtool_ops) + return 0; + + compat = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg, + features, NETIF_F_SG); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum, + features, NETIF_F_ALL_CSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso, + features, NETIF_F_ALL_TSO); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum, + features, NETIF_F_RXCSUM); + compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_flags, + features, flags_dup_features); + + return compat; } static int ethtool_get_features(struct net_device *dev, void __user *useraddr) @@ -234,6 +292,9 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) if (features[0].valid & ~NETIF_F_ETHTOOL_BITS) return -EINVAL; + if (ethtool_set_features_compat(dev, features)) + ret |= ETHTOOL_F_COMPAT; + if (features[0].valid & ~dev->hw_features) { features[0].valid &= dev->hw_features; ret |= ETHTOOL_F_UNSUPPORTED; -- cgit v1.1 From 23dd4cce387124ec3ea06ca30d17854ae4d9b772 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 7 Jan 2011 11:43:40 -0500 Subject: tipc: Combine port structure with tipc_port structure Merge two distinct structures containing information about a TIPC port into a single structure. The structures were previously kept separate so that public information about a port could be made available to applications using TIPC's native API, while the remaining information was kept private for use by TIPC itself. However, now that the native API has been removed there is no longer any need for this somewhat confusing arrangement. Since one of the structures was already embedded within the other, the change largely involves replacing instances of "publ.foo" with "foo". The changes do not otherwise alter the operation of TIPC ports. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 46 +++++------ net/tipc/link.h | 6 +- net/tipc/port.c | 234 +++++++++++++++++++++++++++--------------------------- net/tipc/port.h | 67 +++++++--------- net/tipc/subscr.c | 6 +- 5 files changed, 175 insertions(+), 184 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 18702f5..e30770d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2,7 +2,7 @@ * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2007, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -90,7 +90,7 @@ static void link_handle_out_of_seq_msg(struct link *l_ptr, static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf); static int link_recv_changeover_msg(struct link **l_ptr, struct sk_buff **buf); static void link_set_supervision_props(struct link *l_ptr, u32 tolerance); -static int link_send_sections_long(struct port *sender, +static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, u32 num_sect, u32 destnode); static void link_check_defragm_bufs(struct link *l_ptr); @@ -406,7 +406,7 @@ static void link_start(struct link *l_ptr) static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz) { - struct port *p_ptr; + struct tipc_port *p_ptr; spin_lock_bh(&tipc_port_list_lock); p_ptr = tipc_port_lock(origport); @@ -415,7 +415,7 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz) goto exit; if (!list_empty(&p_ptr->wait_list)) goto exit; - p_ptr->publ.congested = 1; + p_ptr->congested = 1; p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt); list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); l_ptr->stats.link_congs++; @@ -428,8 +428,8 @@ exit: void tipc_link_wakeup_ports(struct link *l_ptr, int all) { - struct port *p_ptr; - struct port *temp_p_ptr; + struct tipc_port *p_ptr; + struct tipc_port *temp_p_ptr; int win = l_ptr->queue_limit[0] - l_ptr->out_queue_size; if (all) @@ -445,11 +445,11 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all) if (win <= 0) break; list_del_init(&p_ptr->wait_list); - spin_lock_bh(p_ptr->publ.lock); - p_ptr->publ.congested = 0; - p_ptr->wakeup(&p_ptr->publ); + spin_lock_bh(p_ptr->lock); + p_ptr->congested = 0; + p_ptr->wakeup(p_ptr); win -= p_ptr->waiting_pkts; - spin_unlock_bh(p_ptr->publ.lock); + spin_unlock_bh(p_ptr->lock); } exit: @@ -1027,12 +1027,12 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) * except for total message length. * Returns user data length or errno. */ -int tipc_link_send_sections_fast(struct port *sender, +int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, const u32 num_sect, u32 destaddr) { - struct tipc_msg *hdr = &sender->publ.phdr; + struct tipc_msg *hdr = &sender->phdr; struct link *l_ptr; struct sk_buff *buf; struct tipc_node *node; @@ -1045,7 +1045,7 @@ again: * (Must not hold any locks while building message.) */ - res = tipc_msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, + res = tipc_msg_build(hdr, msg_sect, num_sect, sender->max_pkt, !sender->user_port, &buf); read_lock_bh(&tipc_net_lock); @@ -1056,7 +1056,7 @@ again: if (likely(l_ptr)) { if (likely(buf)) { res = link_send_buf_fast(l_ptr, buf, - &sender->publ.max_pkt); + &sender->max_pkt); if (unlikely(res < 0)) buf_discard(buf); exit: @@ -1075,7 +1075,7 @@ exit: if (link_congested(l_ptr) || !list_empty(&l_ptr->b_ptr->cong_links)) { res = link_schedule_port(l_ptr, - sender->publ.ref, res); + sender->ref, res); goto exit; } @@ -1084,12 +1084,12 @@ exit: * then re-try fast path or fragment the message */ - sender->publ.max_pkt = l_ptr->max_pkt; + sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); read_unlock_bh(&tipc_net_lock); - if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt) + if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) goto again; return link_send_sections_long(sender, msg_sect, @@ -1123,14 +1123,14 @@ exit: * * Returns user data length or errno. */ -static int link_send_sections_long(struct port *sender, +static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, u32 num_sect, u32 destaddr) { struct link *l_ptr; struct tipc_node *node; - struct tipc_msg *hdr = &sender->publ.phdr; + struct tipc_msg *hdr = &sender->phdr; u32 dsz = msg_data_sz(hdr); u32 max_pkt, fragm_sz, rest; struct tipc_msg fragm_hdr; @@ -1142,7 +1142,7 @@ static int link_send_sections_long(struct port *sender, again: fragm_no = 1; - max_pkt = sender->publ.max_pkt - INT_H_SIZE; + max_pkt = sender->max_pkt - INT_H_SIZE; /* leave room for tunnel header in case of link changeover */ fragm_sz = max_pkt - INT_H_SIZE; /* leave room for fragmentation header in each fragment */ @@ -1157,7 +1157,7 @@ again: tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, INT_H_SIZE, msg_destnode(hdr)); - msg_set_link_selector(&fragm_hdr, sender->publ.ref); + msg_set_link_selector(&fragm_hdr, sender->ref); msg_set_size(&fragm_hdr, max_pkt); msg_set_fragm_no(&fragm_hdr, 1); @@ -1238,13 +1238,13 @@ error: node = tipc_node_find(destaddr); if (likely(node)) { tipc_node_lock(node); - l_ptr = node->active_links[sender->publ.ref & 1]; + l_ptr = node->active_links[sender->ref & 1]; if (!l_ptr) { tipc_node_unlock(node); goto reject; } if (l_ptr->max_pkt < max_pkt) { - sender->publ.max_pkt = l_ptr->max_pkt; + sender->max_pkt = l_ptr->max_pkt; tipc_node_unlock(node); for (; buf_chain; buf_chain = buf) { buf = buf_chain->next; diff --git a/net/tipc/link.h b/net/tipc/link.h index 70967e6..85fd3bc 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -2,7 +2,7 @@ * net/tipc/link.h: Include file for TIPC link code * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -211,7 +211,7 @@ struct link { } stats; }; -struct port; +struct tipc_port; struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, const struct tipc_media_addr *media_addr); @@ -230,7 +230,7 @@ void tipc_link_reset(struct link *l_ptr); int tipc_link_send(struct sk_buff *buf, u32 dest, u32 selector); int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf); u32 tipc_link_get_max_pkt(u32 dest, u32 selector); -int tipc_link_send_sections_fast(struct port *sender, +int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, const u32 num_sect, u32 destnode); diff --git a/net/tipc/port.c b/net/tipc/port.c index 067bab2..aff5dc0 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -2,7 +2,7 @@ * net/tipc/port.c: TIPC port code * * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2008, Wind River Systems + * Copyright (c) 2004-2008, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -54,29 +54,29 @@ static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); -static struct sk_buff *port_build_self_abort_msg(struct port *, u32 err); -static struct sk_buff *port_build_peer_abort_msg(struct port *, u32 err); +static struct sk_buff *port_build_self_abort_msg(struct tipc_port *, u32 err); +static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *, u32 err); static void port_timeout(unsigned long ref); -static u32 port_peernode(struct port *p_ptr) +static u32 port_peernode(struct tipc_port *p_ptr) { - return msg_destnode(&p_ptr->publ.phdr); + return msg_destnode(&p_ptr->phdr); } -static u32 port_peerport(struct port *p_ptr) +static u32 port_peerport(struct tipc_port *p_ptr) { - return msg_destport(&p_ptr->publ.phdr); + return msg_destport(&p_ptr->phdr); } -static u32 port_out_seqno(struct port *p_ptr) +static u32 port_out_seqno(struct tipc_port *p_ptr) { - return msg_transp_seqno(&p_ptr->publ.phdr); + return msg_transp_seqno(&p_ptr->phdr); } -static void port_incr_out_seqno(struct port *p_ptr) +static void port_incr_out_seqno(struct tipc_port *p_ptr) { - struct tipc_msg *m = &p_ptr->publ.phdr; + struct tipc_msg *m = &p_ptr->phdr; if (likely(!msg_routed(m))) return; @@ -94,7 +94,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, struct sk_buff *buf; struct sk_buff *ibuf = NULL; struct port_list dports = {0, NULL, }; - struct port *oport = tipc_port_deref(ref); + struct tipc_port *oport = tipc_port_deref(ref); int ext_targets; int res; @@ -103,7 +103,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, /* Create multicast message */ - hdr = &oport->publ.phdr; + hdr = &oport->phdr; msg_set_type(hdr, TIPC_MCAST_MSG); msg_set_nametype(hdr, seq->type); msg_set_namelower(hdr, seq->lower); @@ -211,7 +211,7 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, void (*wakeup)(struct tipc_port *), const u32 importance) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; u32 ref; @@ -220,17 +220,17 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, warn("Port creation failed, no memory\n"); return NULL; } - ref = tipc_ref_acquire(p_ptr, &p_ptr->publ.lock); + ref = tipc_ref_acquire(p_ptr, &p_ptr->lock); if (!ref) { warn("Port creation failed, reference table exhausted\n"); kfree(p_ptr); return NULL; } - p_ptr->publ.usr_handle = usr_handle; - p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; - p_ptr->publ.ref = ref; - msg = &p_ptr->publ.phdr; + p_ptr->usr_handle = usr_handle; + p_ptr->max_pkt = MAX_PKT_DEFAULT; + p_ptr->ref = ref; + msg = &p_ptr->phdr; tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); msg_set_origport(msg, ref); p_ptr->last_in_seqno = 41; @@ -246,12 +246,12 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, INIT_LIST_HEAD(&p_ptr->port_list); list_add_tail(&p_ptr->port_list, &ports); spin_unlock_bh(&tipc_port_list_lock); - return &(p_ptr->publ); + return p_ptr; } int tipc_deleteport(u32 ref) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct sk_buff *buf = NULL; tipc_withdraw(ref, 0, NULL); @@ -263,7 +263,7 @@ int tipc_deleteport(u32 ref) tipc_port_unlock(p_ptr); k_cancel_timer(&p_ptr->timer); - if (p_ptr->publ.connected) { + if (p_ptr->connected) { buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); } @@ -279,14 +279,14 @@ int tipc_deleteport(u32 ref) return 0; } -static int port_unreliable(struct port *p_ptr) +static int port_unreliable(struct tipc_port *p_ptr) { - return msg_src_droppable(&p_ptr->publ.phdr); + return msg_src_droppable(&p_ptr->phdr); } int tipc_portunreliable(u32 ref, unsigned int *isunreliable) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) @@ -298,24 +298,24 @@ int tipc_portunreliable(u32 ref, unsigned int *isunreliable) int tipc_set_portunreliable(u32 ref, unsigned int isunreliable) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - msg_set_src_droppable(&p_ptr->publ.phdr, (isunreliable != 0)); + msg_set_src_droppable(&p_ptr->phdr, (isunreliable != 0)); tipc_port_unlock(p_ptr); return 0; } -static int port_unreturnable(struct port *p_ptr) +static int port_unreturnable(struct tipc_port *p_ptr) { - return msg_dest_droppable(&p_ptr->publ.phdr); + return msg_dest_droppable(&p_ptr->phdr); } int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) @@ -327,12 +327,12 @@ int tipc_portunreturnable(u32 ref, unsigned int *isunrejectable) int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - msg_set_dest_droppable(&p_ptr->publ.phdr, (isunrejectable != 0)); + msg_set_dest_droppable(&p_ptr->phdr, (isunrejectable != 0)); tipc_port_unlock(p_ptr); return 0; } @@ -413,10 +413,10 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) /* send self-abort message when rejecting on a connected port */ if (msg_connected(msg)) { struct sk_buff *abuf = NULL; - struct port *p_ptr = tipc_port_lock(msg_destport(msg)); + struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); if (p_ptr) { - if (p_ptr->publ.connected) + if (p_ptr->connected) abuf = port_build_self_abort_msg(p_ptr, err); tipc_port_unlock(p_ptr); } @@ -429,7 +429,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) return data_sz; } -int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, +int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, int err) { @@ -446,13 +446,13 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, static void port_timeout(unsigned long ref) { - struct port *p_ptr = tipc_port_lock(ref); + struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; if (!p_ptr) return; - if (!p_ptr->publ.connected) { + if (!p_ptr->connected) { tipc_port_unlock(p_ptr); return; } @@ -463,7 +463,7 @@ static void port_timeout(unsigned long ref) } else { buf = port_build_proto_msg(port_peerport(p_ptr), port_peernode(p_ptr), - p_ptr->publ.ref, + p_ptr->ref, tipc_own_addr, CONN_MANAGER, CONN_PROBE, @@ -481,7 +481,7 @@ static void port_timeout(unsigned long ref) static void port_handle_node_down(unsigned long ref) { - struct port *p_ptr = tipc_port_lock(ref); + struct tipc_port *p_ptr = tipc_port_lock(ref); struct sk_buff *buf = NULL; if (!p_ptr) @@ -492,15 +492,15 @@ static void port_handle_node_down(unsigned long ref) } -static struct sk_buff *port_build_self_abort_msg(struct port *p_ptr, u32 err) +static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 err) { - u32 imp = msg_importance(&p_ptr->publ.phdr); + u32 imp = msg_importance(&p_ptr->phdr); - if (!p_ptr->publ.connected) + if (!p_ptr->connected) return NULL; if (imp < TIPC_CRITICAL_IMPORTANCE) imp++; - return port_build_proto_msg(p_ptr->publ.ref, + return port_build_proto_msg(p_ptr->ref, tipc_own_addr, port_peerport(p_ptr), port_peernode(p_ptr), @@ -512,17 +512,17 @@ static struct sk_buff *port_build_self_abort_msg(struct port *p_ptr, u32 err) } -static struct sk_buff *port_build_peer_abort_msg(struct port *p_ptr, u32 err) +static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 err) { - u32 imp = msg_importance(&p_ptr->publ.phdr); + u32 imp = msg_importance(&p_ptr->phdr); - if (!p_ptr->publ.connected) + if (!p_ptr->connected) return NULL; if (imp < TIPC_CRITICAL_IMPORTANCE) imp++; return port_build_proto_msg(port_peerport(p_ptr), port_peernode(p_ptr), - p_ptr->publ.ref, + p_ptr->ref, tipc_own_addr, imp, TIPC_CONN_MSG, @@ -534,31 +534,31 @@ static struct sk_buff *port_build_peer_abort_msg(struct port *p_ptr, u32 err) void tipc_port_recv_proto_msg(struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); - struct port *p_ptr = tipc_port_lock(msg_destport(msg)); + struct tipc_port *p_ptr = tipc_port_lock(msg_destport(msg)); u32 err = TIPC_OK; struct sk_buff *r_buf = NULL; struct sk_buff *abort_buf = NULL; if (!p_ptr) { err = TIPC_ERR_NO_PORT; - } else if (p_ptr->publ.connected) { + } else if (p_ptr->connected) { if ((port_peernode(p_ptr) != msg_orignode(msg)) || (port_peerport(p_ptr) != msg_origport(msg))) { err = TIPC_ERR_NO_PORT; } else if (msg_type(msg) == CONN_ACK) { int wakeup = tipc_port_congested(p_ptr) && - p_ptr->publ.congested && + p_ptr->congested && p_ptr->wakeup; p_ptr->acked += msg_msgcnt(msg); if (tipc_port_congested(p_ptr)) goto exit; - p_ptr->publ.congested = 0; + p_ptr->congested = 0; if (!wakeup) goto exit; - p_ptr->wakeup(&p_ptr->publ); + p_ptr->wakeup(p_ptr); goto exit; } - } else if (p_ptr->publ.published) { + } else if (p_ptr->published) { err = TIPC_ERR_NO_PORT; } if (err) { @@ -596,29 +596,29 @@ exit: buf_discard(buf); } -static void port_print(struct port *p_ptr, struct print_buf *buf, int full_id) +static void port_print(struct tipc_port *p_ptr, struct print_buf *buf, int full_id) { struct publication *publ; if (full_id) tipc_printf(buf, "<%u.%u.%u:%u>:", tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), - tipc_node(tipc_own_addr), p_ptr->publ.ref); + tipc_node(tipc_own_addr), p_ptr->ref); else - tipc_printf(buf, "%-10u:", p_ptr->publ.ref); + tipc_printf(buf, "%-10u:", p_ptr->ref); - if (p_ptr->publ.connected) { + if (p_ptr->connected) { u32 dport = port_peerport(p_ptr); u32 destnode = port_peernode(p_ptr); tipc_printf(buf, " connected to <%u.%u.%u:%u>", tipc_zone(destnode), tipc_cluster(destnode), tipc_node(destnode), dport); - if (p_ptr->publ.conn_type != 0) + if (p_ptr->conn_type != 0) tipc_printf(buf, " via {%u,%u}", - p_ptr->publ.conn_type, - p_ptr->publ.conn_instance); - } else if (p_ptr->publ.published) { + p_ptr->conn_type, + p_ptr->conn_instance); + } else if (p_ptr->published) { tipc_printf(buf, " bound to"); list_for_each_entry(publ, &p_ptr->publications, pport_list) { if (publ->lower == publ->upper) @@ -639,7 +639,7 @@ struct sk_buff *tipc_port_get_ports(void) struct sk_buff *buf; struct tlv_desc *rep_tlv; struct print_buf pb; - struct port *p_ptr; + struct tipc_port *p_ptr; int str_len; buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_QUERY)); @@ -650,9 +650,9 @@ struct sk_buff *tipc_port_get_ports(void) tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_QUERY); spin_lock_bh(&tipc_port_list_lock); list_for_each_entry(p_ptr, &ports, port_list) { - spin_lock_bh(p_ptr->publ.lock); + spin_lock_bh(p_ptr->lock); port_print(p_ptr, &pb, 0); - spin_unlock_bh(p_ptr->publ.lock); + spin_unlock_bh(p_ptr->lock); } spin_unlock_bh(&tipc_port_list_lock); str_len = tipc_printbuf_validate(&pb); @@ -665,12 +665,12 @@ struct sk_buff *tipc_port_get_ports(void) void tipc_port_reinit(void) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; spin_lock_bh(&tipc_port_list_lock); list_for_each_entry(p_ptr, &ports, port_list) { - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; if (msg_orignode(msg) == tipc_own_addr) break; msg_set_prevnode(msg, tipc_own_addr); @@ -695,7 +695,7 @@ static void port_dispatcher_sigh(void *dummy) spin_unlock_bh(&queue_lock); while (buf) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct user_port *up_ptr; struct tipc_portid orig; struct tipc_name_seq dseq; @@ -720,8 +720,8 @@ static void port_dispatcher_sigh(void *dummy) orig.node = msg_orignode(msg); up_ptr = p_ptr->user_port; usr_handle = up_ptr->usr_handle; - connected = p_ptr->publ.connected; - published = p_ptr->publ.published; + connected = p_ptr->connected; + published = p_ptr->published; if (unlikely(msg_errcode(msg))) goto err; @@ -742,10 +742,10 @@ static void port_dispatcher_sigh(void *dummy) } else if ((msg_origport(msg) != peer_port) || (msg_orignode(msg) != peer_node)) goto reject; - if (unlikely(++p_ptr->publ.conn_unacked >= + if (unlikely(++p_ptr->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) tipc_acknowledge(dref, - p_ptr->publ.conn_unacked); + p_ptr->conn_unacked); skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), msg_data_sz(msg)); @@ -872,7 +872,7 @@ static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf) static void port_wakeup_sh(unsigned long ref) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct user_port *up_ptr; tipc_continue_event cb = NULL; void *uh = NULL; @@ -898,14 +898,14 @@ static void port_wakeup(struct tipc_port *p_ptr) void tipc_acknowledge(u32 ref, u32 ack) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct sk_buff *buf = NULL; p_ptr = tipc_port_lock(ref); if (!p_ptr) return; - if (p_ptr->publ.connected) { - p_ptr->publ.conn_unacked -= ack; + if (p_ptr->connected) { + p_ptr->conn_unacked -= ack; buf = port_build_proto_msg(port_peerport(p_ptr), port_peernode(p_ptr), ref, @@ -936,14 +936,14 @@ int tipc_createport(void *usr_handle, u32 *portref) { struct user_port *up_ptr; - struct port *p_ptr; + struct tipc_port *p_ptr; up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); if (!up_ptr) { warn("Port creation failed, no memory\n"); return -ENOMEM; } - p_ptr = (struct port *)tipc_createport_raw(NULL, port_dispatcher, + p_ptr = (struct tipc_port *)tipc_createport_raw(NULL, port_dispatcher, port_wakeup, importance); if (!p_ptr) { kfree(up_ptr); @@ -952,7 +952,7 @@ int tipc_createport(void *usr_handle, p_ptr->user_port = up_ptr; up_ptr->usr_handle = usr_handle; - up_ptr->ref = p_ptr->publ.ref; + up_ptr->ref = p_ptr->ref; up_ptr->err_cb = error_cb; up_ptr->named_err_cb = named_error_cb; up_ptr->conn_err_cb = conn_error_cb; @@ -960,26 +960,26 @@ int tipc_createport(void *usr_handle, up_ptr->named_msg_cb = named_msg_cb; up_ptr->conn_msg_cb = conn_msg_cb; up_ptr->continue_event_cb = continue_event_cb; - *portref = p_ptr->publ.ref; + *portref = p_ptr->ref; tipc_port_unlock(p_ptr); return 0; } int tipc_portimportance(u32 ref, unsigned int *importance) { - struct port *p_ptr; + struct tipc_port *p_ptr; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - *importance = (unsigned int)msg_importance(&p_ptr->publ.phdr); + *importance = (unsigned int)msg_importance(&p_ptr->phdr); tipc_port_unlock(p_ptr); return 0; } int tipc_set_portimportance(u32 ref, unsigned int imp) { - struct port *p_ptr; + struct tipc_port *p_ptr; if (imp > TIPC_CRITICAL_IMPORTANCE) return -EINVAL; @@ -987,7 +987,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - msg_set_importance(&p_ptr->publ.phdr, (u32)imp); + msg_set_importance(&p_ptr->phdr, (u32)imp); tipc_port_unlock(p_ptr); return 0; } @@ -995,7 +995,7 @@ int tipc_set_portimportance(u32 ref, unsigned int imp) int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct publication *publ; u32 key; int res = -EINVAL; @@ -1004,7 +1004,7 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) if (!p_ptr) return -EINVAL; - if (p_ptr->publ.connected) + if (p_ptr->connected) goto exit; if (seq->lower > seq->upper) goto exit; @@ -1016,11 +1016,11 @@ int tipc_publish(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) goto exit; } publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper, - scope, p_ptr->publ.ref, key); + scope, p_ptr->ref, key); if (publ) { list_add(&publ->pport_list, &p_ptr->publications); p_ptr->pub_count++; - p_ptr->publ.published = 1; + p_ptr->published = 1; res = 0; } exit: @@ -1030,7 +1030,7 @@ exit: int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct publication *publ; struct publication *tpubl; int res = -EINVAL; @@ -1063,37 +1063,37 @@ int tipc_withdraw(u32 ref, unsigned int scope, struct tipc_name_seq const *seq) } } if (list_empty(&p_ptr->publications)) - p_ptr->publ.published = 0; + p_ptr->published = 0; tipc_port_unlock(p_ptr); return res; } int tipc_connect2port(u32 ref, struct tipc_portid const *peer) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; int res = -EINVAL; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - if (p_ptr->publ.published || p_ptr->publ.connected) + if (p_ptr->published || p_ptr->connected) goto exit; if (!peer->ref) goto exit; - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; msg_set_destnode(msg, peer->node); msg_set_destport(msg, peer->ref); msg_set_orignode(msg, tipc_own_addr); - msg_set_origport(msg, p_ptr->publ.ref); + msg_set_origport(msg, p_ptr->ref); msg_set_transp_seqno(msg, 42); msg_set_type(msg, TIPC_CONN_MSG); msg_set_hdr_sz(msg, SHORT_H_SIZE); p_ptr->probing_interval = PROBING_INTERVAL; p_ptr->probing_state = CONFIRMED; - p_ptr->publ.connected = 1; + p_ptr->connected = 1; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); tipc_nodesub_subscribe(&p_ptr->subscription, peer->node, @@ -1102,7 +1102,7 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer) res = 0; exit: tipc_port_unlock(p_ptr); - p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); + p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); return res; } @@ -1120,7 +1120,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr) tp_ptr->connected = 0; /* let timer expire on it's own to avoid deadlock! */ tipc_nodesub_unsubscribe( - &((struct port *)tp_ptr)->subscription); + &((struct tipc_port *)tp_ptr)->subscription); res = 0; } else { res = -ENOTCONN; @@ -1135,7 +1135,7 @@ int tipc_disconnect_port(struct tipc_port *tp_ptr) int tipc_disconnect(u32 ref) { - struct port *p_ptr; + struct tipc_port *p_ptr; int res; p_ptr = tipc_port_lock(ref); @@ -1151,15 +1151,15 @@ int tipc_disconnect(u32 ref) */ int tipc_shutdown(u32 ref) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct sk_buff *buf = NULL; p_ptr = tipc_port_lock(ref); if (!p_ptr) return -EINVAL; - if (p_ptr->publ.connected) { - u32 imp = msg_importance(&p_ptr->publ.phdr); + if (p_ptr->connected) { + u32 imp = msg_importance(&p_ptr->phdr); if (imp < TIPC_CRITICAL_IMPORTANCE) imp++; buf = port_build_proto_msg(port_peerport(p_ptr), @@ -1182,13 +1182,13 @@ int tipc_shutdown(u32 ref) * message for this node. */ -static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, +static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_sect, struct iovec const *msg_sect) { struct sk_buff *buf; int res; - res = tipc_msg_build(&sender->publ.phdr, msg_sect, num_sect, + res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, MAX_MSG_SIZE, !sender->user_port, &buf); if (likely(buf)) tipc_port_recv_msg(buf); @@ -1201,15 +1201,15 @@ static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) { - struct port *p_ptr; + struct tipc_port *p_ptr; u32 destnode; int res; p_ptr = tipc_port_deref(ref); - if (!p_ptr || !p_ptr->publ.connected) + if (!p_ptr || !p_ptr->connected) return -EINVAL; - p_ptr->publ.congested = 1; + p_ptr->congested = 1; if (!tipc_port_congested(p_ptr)) { destnode = port_peernode(p_ptr); if (likely(destnode != tipc_own_addr)) @@ -1220,13 +1220,13 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) if (likely(res != -ELINKCONG)) { port_incr_out_seqno(p_ptr); - p_ptr->publ.congested = 0; + p_ptr->congested = 0; p_ptr->sent++; return res; } } if (port_unreliable(p_ptr)) { - p_ptr->publ.congested = 0; + p_ptr->congested = 0; /* Just calculate msg length and return */ return tipc_msg_calc_data_size(msg_sect, num_sect); } @@ -1240,17 +1240,17 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, unsigned int num_sect, struct iovec const *msg_sect) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; u32 destnode = domain; u32 destport; int res; p_ptr = tipc_port_deref(ref); - if (!p_ptr || p_ptr->publ.connected) + if (!p_ptr || p_ptr->connected) return -EINVAL; - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; msg_set_type(msg, TIPC_NAMED_MSG); msg_set_orignode(msg, tipc_own_addr); msg_set_origport(msg, ref); @@ -1287,15 +1287,15 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, int tipc_send2port(u32 ref, struct tipc_portid const *dest, unsigned int num_sect, struct iovec const *msg_sect) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; int res; p_ptr = tipc_port_deref(ref); - if (!p_ptr || p_ptr->publ.connected) + if (!p_ptr || p_ptr->connected) return -EINVAL; - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; msg_set_type(msg, TIPC_DIRECT_MSG); msg_set_orignode(msg, tipc_own_addr); msg_set_origport(msg, ref); @@ -1322,15 +1322,15 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, struct sk_buff *buf, unsigned int dsz) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg; int res; - p_ptr = (struct port *)tipc_ref_deref(ref); - if (!p_ptr || p_ptr->publ.connected) + p_ptr = (struct tipc_port *)tipc_ref_deref(ref); + if (!p_ptr || p_ptr->connected) return -EINVAL; - msg = &p_ptr->publ.phdr; + msg = &p_ptr->phdr; msg_set_type(msg, TIPC_DIRECT_MSG); msg_set_orignode(msg, tipc_own_addr); msg_set_origport(msg, ref); diff --git a/net/tipc/port.h b/net/tipc/port.h index 8e84b98..f8722af 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -2,7 +2,7 @@ * net/tipc/port.h: Include file for TIPC port code * * Copyright (c) 1994-2007, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,7 +95,7 @@ struct user_port { }; /** - * struct tipc_port - TIPC port info available to socket API + * struct tipc_port - TIPC port structure * @usr_handle: pointer to additional user-defined information about port * @lock: pointer to spinlock for controlling access to port * @connected: non-zero if port is currently connected to a peer port @@ -107,24 +107,6 @@ struct user_port { * @max_pkt: maximum packet size "hint" used when building messages sent by port * @ref: unique reference to port in TIPC object registry * @phdr: preformatted message header used when sending messages - */ -struct tipc_port { - void *usr_handle; - spinlock_t *lock; - int connected; - u32 conn_type; - u32 conn_instance; - u32 conn_unacked; - int published; - u32 congested; - u32 max_pkt; - u32 ref; - struct tipc_msg phdr; -}; - -/** - * struct port - TIPC port structure - * @publ: TIPC port info available to privileged users * @port_list: adjacent ports in TIPC's global list of ports * @dispatcher: ptr to routine which handles received messages * @wakeup: ptr to routine to call when port is no longer congested @@ -141,9 +123,18 @@ struct tipc_port { * @timer_ref: * @subscription: "node down" subscription used to terminate failed connections */ - -struct port { - struct tipc_port publ; +struct tipc_port { + void *usr_handle; + spinlock_t *lock; + int connected; + u32 conn_type; + u32 conn_instance; + u32 conn_unacked; + int published; + u32 congested; + u32 max_pkt; + u32 ref; + struct tipc_msg phdr; struct list_head port_list; u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); void (*wakeup)(struct tipc_port *); @@ -230,7 +221,7 @@ int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, unsigned int section_count, struct iovec const *msg); -int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, +int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, int err); struct sk_buff *tipc_port_get_ports(void); @@ -242,9 +233,9 @@ void tipc_port_reinit(void); * tipc_port_lock - lock port instance referred to and return its pointer */ -static inline struct port *tipc_port_lock(u32 ref) +static inline struct tipc_port *tipc_port_lock(u32 ref) { - return (struct port *)tipc_ref_lock(ref); + return (struct tipc_port *)tipc_ref_lock(ref); } /** @@ -253,27 +244,27 @@ static inline struct port *tipc_port_lock(u32 ref) * Can use pointer instead of tipc_ref_unlock() since port is already locked. */ -static inline void tipc_port_unlock(struct port *p_ptr) +static inline void tipc_port_unlock(struct tipc_port *p_ptr) { - spin_unlock_bh(p_ptr->publ.lock); + spin_unlock_bh(p_ptr->lock); } -static inline struct port *tipc_port_deref(u32 ref) +static inline struct tipc_port *tipc_port_deref(u32 ref) { - return (struct port *)tipc_ref_deref(ref); + return (struct tipc_port *)tipc_ref_deref(ref); } -static inline u32 tipc_peer_port(struct port *p_ptr) +static inline u32 tipc_peer_port(struct tipc_port *p_ptr) { - return msg_destport(&p_ptr->publ.phdr); + return msg_destport(&p_ptr->phdr); } -static inline u32 tipc_peer_node(struct port *p_ptr) +static inline u32 tipc_peer_node(struct tipc_port *p_ptr) { - return msg_destnode(&p_ptr->publ.phdr); + return msg_destnode(&p_ptr->phdr); } -static inline int tipc_port_congested(struct port *p_ptr) +static inline int tipc_port_congested(struct tipc_port *p_ptr) { return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); } @@ -284,7 +275,7 @@ static inline int tipc_port_congested(struct port *p_ptr) static inline int tipc_port_recv_msg(struct sk_buff *buf) { - struct port *p_ptr; + struct tipc_port *p_ptr; struct tipc_msg *msg = buf_msg(buf); u32 destport = msg_destport(msg); u32 dsz = msg_data_sz(msg); @@ -299,7 +290,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf) /* validate destination & pass to port, otherwise reject message */ p_ptr = tipc_port_lock(destport); if (likely(p_ptr)) { - if (likely(p_ptr->publ.connected)) { + if (likely(p_ptr->connected)) { if ((unlikely(msg_origport(msg) != tipc_peer_port(p_ptr))) || (unlikely(msg_orignode(msg) != tipc_peer_node(p_ptr))) || (unlikely(!msg_connected(msg)))) { @@ -308,7 +299,7 @@ static inline int tipc_port_recv_msg(struct sk_buff *buf) goto reject; } } - err = p_ptr->dispatcher(&p_ptr->publ, buf); + err = p_ptr->dispatcher(p_ptr, buf); tipc_port_unlock(p_ptr); if (likely(!err)) return dsz; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index ca04479..98ee50b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -2,7 +2,7 @@ * net/tipc/subscr.c: TIPC network topology service * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -160,7 +160,7 @@ void tipc_subscr_report_overlap(struct subscription *sub, static void subscr_timeout(struct subscription *sub) { - struct port *server_port; + struct tipc_port *server_port; /* Validate server port reference (in case subscriber is terminating) */ @@ -508,7 +508,7 @@ static void subscr_named_msg_event(void *usr_handle, /* Lock server port (& save lock address for future use) */ - subscriber->lock = tipc_port_lock(subscriber->port_ref)->publ.lock; + subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock; /* Add subscriber to topology server's subscriber list */ -- cgit v1.1 From 2d627b92fd1e39d83c3ee0b9d410403f98cb3981 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 7 Jan 2011 13:00:11 -0500 Subject: tipc: Combine bearer structure with tipc_bearer structure Combines two distinct structures containing information about a TIPC bearer into a single structure. The structures were previously kept separate so that public information about a bearer could be made available to plug-in media types using TIPC's native API, while the remaining information was kept private for use by TIPC itself. However, now that the native API has been removed there is no longer any need for this arrangement. Since one of the structures was already embedded within the other, the change largely involves replacing instances of "publ.foo" with "foo". The changes do not otherwise alter the operation of TIPC bearers. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/bcast.c | 28 +++++++-------- net/tipc/bearer.c | 98 ++++++++++++++++++++++++++--------------------------- net/tipc/bearer.h | 67 +++++++++++++++++------------------- net/tipc/discover.c | 26 +++++++------- net/tipc/discover.h | 6 ++-- net/tipc/link.c | 13 ++++--- net/tipc/link.h | 4 +-- net/tipc/node.c | 4 +-- 8 files changed, 119 insertions(+), 127 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 70ab5ef..b4d659d 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -3,7 +3,7 @@ * * Copyright (c) 2004-2006, Ericsson AB * Copyright (c) 2004, Intel Corporation. - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,8 +61,8 @@ */ struct bcbearer_pair { - struct bearer *primary; - struct bearer *secondary; + struct tipc_bearer *primary; + struct tipc_bearer *secondary; }; /** @@ -81,7 +81,7 @@ struct bcbearer_pair { */ struct bcbearer { - struct bearer bearer; + struct tipc_bearer bearer; struct media media; struct bcbearer_pair bpairs[MAX_BEARERS]; struct bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1]; @@ -574,8 +574,8 @@ static int tipc_bcbearer_send(struct sk_buff *buf, bcbearer->remains = tipc_bcast_nmap; for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { - struct bearer *p = bcbearer->bpairs[bp_index].primary; - struct bearer *s = bcbearer->bpairs[bp_index].secondary; + struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; + struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; if (!p) break; /* no more bearers to try */ @@ -584,11 +584,11 @@ static int tipc_bcbearer_send(struct sk_buff *buf, if (bcbearer->remains_new.count == bcbearer->remains.count) continue; /* bearer pair doesn't add anything */ - if (p->publ.blocked || - p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { + if (p->blocked || + p->media->send_msg(buf, p, &p->media->bcast_addr)) { /* unable to send on primary bearer */ - if (!s || s->publ.blocked || - s->media->send_msg(buf, &s->publ, + if (!s || s->blocked || + s->media->send_msg(buf, s, &s->media->bcast_addr)) { /* unable to send on either bearer */ continue; @@ -633,7 +633,7 @@ void tipc_bcbearer_sort(void) memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp)); for (b_index = 0; b_index < MAX_BEARERS; b_index++) { - struct bearer *b = &tipc_bearers[b_index]; + struct tipc_bearer *b = &tipc_bearers[b_index]; if (!b->active || !b->nodes.count) continue; @@ -682,12 +682,12 @@ void tipc_bcbearer_sort(void) void tipc_bcbearer_push(void) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; spin_lock_bh(&bc_lock); b_ptr = &bcbearer->bearer; - if (b_ptr->publ.blocked) { - b_ptr->publ.blocked = 0; + if (b_ptr->blocked) { + b_ptr->blocked = 0; tipc_bearer_lock_push(b_ptr); } spin_unlock_bh(&bc_lock); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 837b7a4..9e2ff0e 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -2,7 +2,7 @@ * net/tipc/bearer.c: TIPC bearer code * * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 2004-2006, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -44,7 +44,7 @@ static struct media media_list[MAX_MEDIA]; static u32 media_count; -struct bearer tipc_bearers[MAX_BEARERS]; +struct tipc_bearer tipc_bearers[MAX_BEARERS]; /** * media_name_valid - validate media name @@ -278,13 +278,13 @@ static int bearer_name_validate(const char *name, * bearer_find - locates bearer object with matching bearer name */ -static struct bearer *bearer_find(const char *name) +static struct tipc_bearer *bearer_find(const char *name) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; u32 i; for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { - if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) + if (b_ptr->active && (!strcmp(b_ptr->name, name))) return b_ptr; } return NULL; @@ -294,16 +294,16 @@ static struct bearer *bearer_find(const char *name) * tipc_bearer_find_interface - locates bearer object with matching interface name */ -struct bearer *tipc_bearer_find_interface(const char *if_name) +struct tipc_bearer *tipc_bearer_find_interface(const char *if_name) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; char *b_if_name; u32 i; for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { if (!b_ptr->active) continue; - b_if_name = strchr(b_ptr->publ.name, ':') + 1; + b_if_name = strchr(b_ptr->name, ':') + 1; if (!strcmp(b_if_name, if_name)) return b_ptr; } @@ -318,7 +318,7 @@ struct sk_buff *tipc_bearer_get_names(void) { struct sk_buff *buf; struct media *m_ptr; - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; int i, j; buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME)); @@ -331,8 +331,8 @@ struct sk_buff *tipc_bearer_get_names(void) b_ptr = &tipc_bearers[j]; if (b_ptr->active && (b_ptr->media == m_ptr)) { tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME, - b_ptr->publ.name, - strlen(b_ptr->publ.name) + 1); + b_ptr->name, + strlen(b_ptr->name) + 1); } } } @@ -340,14 +340,14 @@ struct sk_buff *tipc_bearer_get_names(void) return buf; } -void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest) +void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest) { tipc_nmap_add(&b_ptr->nodes, dest); tipc_disc_update_link_req(b_ptr->link_req); tipc_bcbearer_sort(); } -void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest) +void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest) { tipc_nmap_remove(&b_ptr->nodes, dest); tipc_disc_update_link_req(b_ptr->link_req); @@ -362,12 +362,12 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest) * bearer.lock must be taken before calling * Returns binary true(1) ore false(0) */ -static int bearer_push(struct bearer *b_ptr) +static int bearer_push(struct tipc_bearer *b_ptr) { u32 res = 0; struct link *ln, *tln; - if (b_ptr->publ.blocked) + if (b_ptr->blocked) return 0; while (!list_empty(&b_ptr->cong_links) && (res != PUSH_FAILED)) { @@ -382,13 +382,13 @@ static int bearer_push(struct bearer *b_ptr) return list_empty(&b_ptr->cong_links); } -void tipc_bearer_lock_push(struct bearer *b_ptr) +void tipc_bearer_lock_push(struct tipc_bearer *b_ptr) { int res; - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); res = bearer_push(b_ptr); - spin_unlock_bh(&b_ptr->publ.lock); + spin_unlock_bh(&b_ptr->lock); if (res) tipc_bcbearer_push(); } @@ -398,16 +398,14 @@ void tipc_bearer_lock_push(struct bearer *b_ptr) * Interrupt enabling new requests after bearer congestion or blocking: * See bearer_send(). */ -void tipc_continue(struct tipc_bearer *tb_ptr) +void tipc_continue(struct tipc_bearer *b_ptr) { - struct bearer *b_ptr = (struct bearer *)tb_ptr; - - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); b_ptr->continue_count++; if (!list_empty(&b_ptr->cong_links)) tipc_k_signal((Handler)tipc_bearer_lock_push, (unsigned long)b_ptr); - b_ptr->publ.blocked = 0; - spin_unlock_bh(&b_ptr->publ.lock); + b_ptr->blocked = 0; + spin_unlock_bh(&b_ptr->lock); } /* @@ -418,7 +416,7 @@ void tipc_continue(struct tipc_bearer *tb_ptr) * bearer.lock is busy */ -static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_ptr) +static void tipc_bearer_schedule_unlocked(struct tipc_bearer *b_ptr, struct link *l_ptr) { list_move_tail(&l_ptr->link_list, &b_ptr->cong_links); } @@ -431,11 +429,11 @@ static void tipc_bearer_schedule_unlocked(struct bearer *b_ptr, struct link *l_p * bearer.lock is free */ -void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr) +void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr) { - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); tipc_bearer_schedule_unlocked(b_ptr, l_ptr); - spin_unlock_bh(&b_ptr->publ.lock); + spin_unlock_bh(&b_ptr->lock); } @@ -444,18 +442,18 @@ void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr) * and if there is, try to resolve it before returning. * 'tipc_net_lock' is read_locked when this function is called */ -int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr) +int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr) { int res = 1; if (list_empty(&b_ptr->cong_links)) return 1; - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); if (!bearer_push(b_ptr)) { tipc_bearer_schedule_unlocked(b_ptr, l_ptr); res = 0; } - spin_unlock_bh(&b_ptr->publ.lock); + spin_unlock_bh(&b_ptr->lock); return res; } @@ -463,9 +461,9 @@ int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr) * tipc_bearer_congested - determines if bearer is currently congested */ -int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr) +int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr) { - if (unlikely(b_ptr->publ.blocked)) + if (unlikely(b_ptr->blocked)) return 1; if (likely(list_empty(&b_ptr->cong_links))) return 0; @@ -478,7 +476,7 @@ int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr) int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; struct media *m_ptr; struct bearer_name b_name; char addr_string[16]; @@ -528,7 +526,7 @@ restart: bearer_id = i; continue; } - if (!strcmp(name, tipc_bearers[i].publ.name)) { + if (!strcmp(name, tipc_bearers[i].name)) { warn("Bearer <%s> rejected, already enabled\n", name); goto failed; } @@ -551,8 +549,8 @@ restart: } b_ptr = &tipc_bearers[bearer_id]; - strcpy(b_ptr->publ.name, name); - res = m_ptr->enable_bearer(&b_ptr->publ); + strcpy(b_ptr->name, name); + res = m_ptr->enable_bearer(b_ptr); if (res) { warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); goto failed; @@ -570,7 +568,7 @@ restart: b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, bcast_scope, 2); } - spin_lock_init(&b_ptr->publ.lock); + spin_lock_init(&b_ptr->lock); write_unlock_bh(&tipc_net_lock); info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, bcast_scope), priority); @@ -587,7 +585,7 @@ failed: int tipc_block_bearer(const char *name) { - struct bearer *b_ptr = NULL; + struct tipc_bearer *b_ptr = NULL; struct link *l_ptr; struct link *temp_l_ptr; @@ -600,8 +598,8 @@ int tipc_block_bearer(const char *name) } info("Blocking bearer <%s>\n", name); - spin_lock_bh(&b_ptr->publ.lock); - b_ptr->publ.blocked = 1; + spin_lock_bh(&b_ptr->lock); + b_ptr->blocked = 1; list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { struct tipc_node *n_ptr = l_ptr->owner; @@ -609,7 +607,7 @@ int tipc_block_bearer(const char *name) tipc_link_reset(l_ptr); spin_unlock_bh(&n_ptr->lock); } - spin_unlock_bh(&b_ptr->publ.lock); + spin_unlock_bh(&b_ptr->lock); read_unlock_bh(&tipc_net_lock); return 0; } @@ -620,27 +618,27 @@ int tipc_block_bearer(const char *name) * Note: This routine assumes caller holds tipc_net_lock. */ -static void bearer_disable(struct bearer *b_ptr) +static void bearer_disable(struct tipc_bearer *b_ptr) { struct link *l_ptr; struct link *temp_l_ptr; - info("Disabling bearer <%s>\n", b_ptr->publ.name); + info("Disabling bearer <%s>\n", b_ptr->name); tipc_disc_stop_link_req(b_ptr->link_req); - spin_lock_bh(&b_ptr->publ.lock); + spin_lock_bh(&b_ptr->lock); b_ptr->link_req = NULL; - b_ptr->publ.blocked = 1; - b_ptr->media->disable_bearer(&b_ptr->publ); + b_ptr->blocked = 1; + b_ptr->media->disable_bearer(b_ptr); list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { tipc_link_delete(l_ptr); } - spin_unlock_bh(&b_ptr->publ.lock); - memset(b_ptr, 0, sizeof(struct bearer)); + spin_unlock_bh(&b_ptr->lock); + memset(b_ptr, 0, sizeof(struct tipc_bearer)); } int tipc_disable_bearer(const char *name) { - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; int res; write_lock_bh(&tipc_net_lock); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 85f451d..255dea6 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -2,7 +2,7 @@ * net/tipc/bearer.h: Include file for TIPC bearer code * * Copyright (c) 1996-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -61,26 +61,7 @@ struct tipc_media_addr { } dev_addr; }; -/** - * struct tipc_bearer - TIPC bearer info available to media code - * @usr_handle: pointer to additional media-specific information about bearer - * @mtu: max packet size bearer can support - * @blocked: non-zero if bearer is blocked - * @lock: spinlock for controlling access to bearer - * @addr: media-specific address associated with bearer - * @name: bearer name (format = media:interface) - * - * Note: TIPC initializes "name" and "lock" fields; media code is responsible - * for initialization all other fields when a bearer is enabled. - */ -struct tipc_bearer { - void *usr_handle; - u32 mtu; - int blocked; - spinlock_t lock; - struct tipc_media_addr addr; - char name[TIPC_MAX_BEARER_NAME]; -}; +struct tipc_bearer; /** * struct media - TIPC media information available to internal users @@ -115,8 +96,13 @@ struct media { }; /** - * struct bearer - TIPC bearer information available to internal users - * @publ: bearer information available to privileged users + * struct tipc_bearer - TIPC bearer structure + * @usr_handle: pointer to additional media-specific information about bearer + * @mtu: max packet size bearer can support + * @blocked: non-zero if bearer is blocked + * @lock: spinlock for controlling access to bearer + * @addr: media-specific address associated with bearer + * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @priority: default link priority for bearer * @detect_scope: network address mask used during automatic link creation @@ -128,10 +114,18 @@ struct media { * @active: non-zero if bearer structure is represents a bearer * @net_plane: network plane ('A' through 'H') currently associated with bearer * @nodes: indicates which nodes in cluster can be reached through bearer + * + * Note: media-specific code is responsible for initialization of the fields + * indicated below when a bearer is enabled; TIPC's generic bearer code takes + * care of initializing all other fields. */ - -struct bearer { - struct tipc_bearer publ; +struct tipc_bearer { + void *usr_handle; /* initalized by media */ + u32 mtu; /* initalized by media */ + int blocked; /* initalized by media */ + struct tipc_media_addr addr; /* initalized by media */ + char name[TIPC_MAX_BEARER_NAME]; + spinlock_t lock; struct media *media; u32 priority; u32 detect_scope; @@ -152,7 +146,7 @@ struct bearer_name { struct link; -extern struct bearer tipc_bearers[]; +extern struct tipc_bearer tipc_bearers[]; /* * TIPC routines available to supported media types @@ -186,14 +180,14 @@ void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); struct sk_buff *tipc_bearer_get_names(void); -void tipc_bearer_add_dest(struct bearer *b_ptr, u32 dest); -void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest); -void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr); -struct bearer *tipc_bearer_find_interface(const char *if_name); -int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr); -int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr); +void tipc_bearer_add_dest(struct tipc_bearer *b_ptr, u32 dest); +void tipc_bearer_remove_dest(struct tipc_bearer *b_ptr, u32 dest); +void tipc_bearer_schedule(struct tipc_bearer *b_ptr, struct link *l_ptr); +struct tipc_bearer *tipc_bearer_find_interface(const char *if_name); +int tipc_bearer_resolve_congestion(struct tipc_bearer *b_ptr, struct link *l_ptr); +int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr); void tipc_bearer_stop(void); -void tipc_bearer_lock_push(struct bearer *b_ptr); +void tipc_bearer_lock_push(struct tipc_bearer *b_ptr); /** @@ -214,10 +208,11 @@ void tipc_bearer_lock_push(struct bearer *b_ptr); * and let TIPC's link code deal with the undelivered message. */ -static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf, +static inline int tipc_bearer_send(struct tipc_bearer *b_ptr, + struct sk_buff *buf, struct tipc_media_addr *dest) { - return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest); + return !b_ptr->media->send_msg(buf, b_ptr, dest); } #endif /* _TIPC_BEARER_H */ diff --git a/net/tipc/discover.c b/net/tipc/discover.c index fa026bd..59a86fc 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -2,7 +2,7 @@ * net/tipc/discover.c * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,7 +57,7 @@ * @timer_intv: current interval between requests (in ms) */ struct link_req { - struct bearer *bearer; + struct tipc_bearer *bearer; struct tipc_media_addr dest; struct sk_buff *buf; struct timer_list timer; @@ -75,7 +75,7 @@ struct link_req { static struct sk_buff *tipc_disc_init_msg(u32 type, u32 req_links, u32 dest_domain, - struct bearer *b_ptr) + struct tipc_bearer *b_ptr) { struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE); struct tipc_msg *msg; @@ -87,7 +87,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, msg_set_req_links(msg, req_links); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); - msg_set_media_addr(msg, &b_ptr->publ.addr); + msg_set_media_addr(msg, &b_ptr->addr); } return buf; } @@ -99,7 +99,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, * @media_addr: media address advertised by duplicated node */ -static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, +static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, struct tipc_media_addr *media_addr) { char node_addr_str[16]; @@ -111,7 +111,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, tipc_media_addr_printf(&pb, media_addr); tipc_printbuf_validate(&pb); warn("Duplicate %s using %s seen on <%s>\n", - node_addr_str, media_addr_str, b_ptr->publ.name); + node_addr_str, media_addr_str, b_ptr->name); } /** @@ -120,7 +120,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, * @b_ptr: bearer that message arrived on */ -void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) +void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) { struct link *link; struct tipc_media_addr media_addr; @@ -140,7 +140,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) if (!tipc_addr_node_valid(orig)) return; if (orig == tipc_own_addr) { - if (memcmp(&media_addr, &b_ptr->publ.addr, sizeof(media_addr))) + if (memcmp(&media_addr, &b_ptr->addr, sizeof(media_addr))) disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); return; } @@ -193,7 +193,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) return; rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr); if (rbuf != NULL) { - b_ptr->media->send_msg(rbuf, &b_ptr->publ, &media_addr); + b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); buf_discard(rbuf); } } @@ -249,9 +249,9 @@ void tipc_disc_update_link_req(struct link_req *req) static void disc_timeout(struct link_req *req) { - spin_lock_bh(&req->bearer->publ.lock); + spin_lock_bh(&req->bearer->lock); - req->bearer->media->send_msg(req->buf, &req->bearer->publ, &req->dest); + req->bearer->media->send_msg(req->buf, req->bearer, &req->dest); if ((req->timer_intv == TIPC_LINK_REQ_SLOW) || (req->timer_intv == TIPC_LINK_REQ_FAST)) { @@ -266,7 +266,7 @@ static void disc_timeout(struct link_req *req) } k_start_timer(&req->timer, req->timer_intv); - spin_unlock_bh(&req->bearer->publ.lock); + spin_unlock_bh(&req->bearer->lock); } /** @@ -279,7 +279,7 @@ static void disc_timeout(struct link_req *req) * Returns pointer to link request structure, or NULL if unable to create. */ -struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, +struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr, const struct tipc_media_addr *dest, u32 dest_domain, u32 req_links) diff --git a/net/tipc/discover.h b/net/tipc/discover.h index d2c3cff..4046d77 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -2,7 +2,7 @@ * net/tipc/discover.h * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,13 +39,13 @@ struct link_req; -struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, +struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr, const struct tipc_media_addr *dest, u32 dest_domain, u32 req_links); void tipc_disc_update_link_req(struct link_req *req); void tipc_disc_stop_link_req(struct link_req *req); -void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); +void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr); #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index e30770d..1c5c53a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -113,7 +113,7 @@ static void link_init_max_pkt(struct link *l_ptr) { u32 max_pkt; - max_pkt = (l_ptr->b_ptr->publ.mtu & ~3); + max_pkt = (l_ptr->b_ptr->mtu & ~3); if (max_pkt > MAX_MSG_SIZE) max_pkt = MAX_MSG_SIZE; @@ -303,7 +303,7 @@ static void link_set_timer(struct link *l_ptr, u32 time) * Returns pointer to link. */ -struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, +struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, const struct tipc_media_addr *media_addr) { struct link *l_ptr; @@ -317,7 +317,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, } l_ptr->addr = peer; - if_name = strchr(b_ptr->publ.name, ':') + 1; + if_name = strchr(b_ptr->name, ':') + 1; sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:", tipc_zone(tipc_own_addr), tipc_cluster(tipc_own_addr), tipc_node(tipc_own_addr), @@ -1595,11 +1595,10 @@ static int link_recv_buf_validate(struct sk_buff *buf) * structure (i.e. cannot be NULL), but bearer can be inactive. */ -void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) +void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *b_ptr) { read_lock_bh(&tipc_net_lock); while (head) { - struct bearer *b_ptr = (struct bearer *)tb_ptr; struct tipc_node *n_ptr; struct link *l_ptr; struct sk_buff *crs; @@ -2658,7 +2657,7 @@ void tipc_link_set_queue_limits(struct link *l_ptr, u32 window) static struct link *link_find_link(const char *name, struct tipc_node **node) { struct link_name link_name_parts; - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; struct link *l_ptr; if (!link_name_validate(name, &link_name_parts)) @@ -2961,7 +2960,7 @@ static void link_print(struct link *l_ptr, const char *str) tipc_printf(buf, str); tipc_printf(buf, "Link %x<%s>:", - l_ptr->addr, l_ptr->b_ptr->publ.name); + l_ptr->addr, l_ptr->b_ptr->name); #ifdef CONFIG_TIPC_DEBUG if (link_reset_reset(l_ptr) || link_reset_unknown(l_ptr)) diff --git a/net/tipc/link.h b/net/tipc/link.h index 85fd3bc..bdb0fa2 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -122,7 +122,7 @@ struct link { u32 checkpoint; u32 peer_session; u32 peer_bearer_id; - struct bearer *b_ptr; + struct tipc_bearer *b_ptr; u32 tolerance; u32 continuity_interval; u32 abort_limit; @@ -213,7 +213,7 @@ struct link { struct tipc_port; -struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, +struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, const struct tipc_media_addr *media_addr); void tipc_link_delete(struct link *l_ptr); void tipc_link_changeover(struct link *l_ptr); diff --git a/net/tipc/node.c b/net/tipc/node.c index 3af53e3..e4dba1d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2,7 +2,7 @@ * net/tipc/node.c: TIPC node management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -238,7 +238,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr) return n_ptr; } err("Attempt to establish second link on <%s> to %s\n", - l_ptr->b_ptr->publ.name, + l_ptr->b_ptr->name, tipc_addr_string_fill(addr_string, l_ptr->addr)); } return NULL; -- cgit v1.1 From 4132facae1df653b5a78e0e32956218199026812 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 7 Jan 2011 13:12:12 -0500 Subject: tipc: Remove unused global variable tipc_user_count Eliminates a global variable that was previously used by TIPC's user registry to track the number of distinct applications using TIPC. Due to the recent elimination of the user registry this variable no longer serves any purpose and can be removed. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/core.c | 3 +-- net/tipc/core.h | 3 +-- net/tipc/socket.c | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/tipc/core.c b/net/tipc/core.c index e071579..2da1fc7 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -2,7 +2,7 @@ * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2006, Wind River Systems + * Copyright (c) 2005-2006, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,7 +57,6 @@ int tipc_mode = TIPC_NOT_RUNNING; int tipc_random; -atomic_t tipc_user_count = ATOMIC_INIT(0); const char tipc_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_."; diff --git a/net/tipc/core.h b/net/tipc/core.h index 9971585..37544d9 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -2,7 +2,7 @@ * net/tipc/core.h: Include file for TIPC global declarations * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -161,7 +161,6 @@ extern int tipc_remote_management; extern int tipc_mode; extern int tipc_random; extern const char tipc_alphabet[]; -extern atomic_t tipc_user_count; /* diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 2b02a3a..893ca6e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2,7 +2,7 @@ * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, Ericsson AB - * Copyright (c) 2004-2008, Wind River Systems + * Copyright (c) 2004-2008, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -241,7 +241,6 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, tipc_set_portunreliable(tp_ptr->ref, 1); } - atomic_inc(&tipc_user_count); return 0; } @@ -321,7 +320,6 @@ static int release(struct socket *sock) sock_put(sk); sock->sk = NULL; - atomic_dec(&tipc_user_count); return res; } -- cgit v1.1 From 3f8dd9446e66f2a982ddcff38e4705cfe93eeec6 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 13:09:29 -0500 Subject: tipc: Prevent invalid memory access when sending to configuration service Reject TIPC configuration service messages without a full message header. Previously, an application that sent a message to the configuration service that was too short could cause the validation code to access an uninitialized field in the msghdr structure, resulting in a memory access exception. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 893ca6e..125dcb0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -493,6 +493,8 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) if (likely(dest->addr.name.name.type != TIPC_CFG_SRV)) return -EACCES; + if (!m->msg_iovlen || (m->msg_iov[0].iov_len < sizeof(hdr))) + return -EMSGSIZE; if (copy_from_user(&hdr, m->msg_iov[0].iov_base, sizeof(hdr))) return -EFAULT; if ((ntohs(hdr.tcm_type) & 0xC000) && (!capable(CAP_NET_ADMIN))) -- cgit v1.1 From 5413b4c6c07b659e52c84a4e40d897b32b89834f Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 13:24:55 -0500 Subject: tipc: Improve handling of invalid link tolerance values Enhances TIPC link code to ignore an invalid link tolerance value contained in an incoming LINK_PROTOCOL message, rather than processing the value and potentially causing a divide-by-zero error. Also add a compile-time check that catches attempts to redefine TIPC's minimum link tolerance value in a manner that might result in the same divide-by-zero error at run-time. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 1c5c53a..3c1c28c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2617,6 +2617,9 @@ static void link_check_defragm_bufs(struct link *l_ptr) static void link_set_supervision_props(struct link *l_ptr, u32 tolerance) { + if ((tolerance < TIPC_MIN_LINK_TOL) || (tolerance > TIPC_MAX_LINK_TOL)) + return; + l_ptr->tolerance = tolerance; l_ptr->continuity_interval = ((tolerance / 4) > 500) ? 500 : tolerance / 4; -- cgit v1.1 From c8a61b52ebac3645b4e3c5b03c2073e6c8c119a8 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 13:31:32 -0500 Subject: tipc: Fix print statements that assume pointers are 32-bit values Corrects print statements that use %x to print pointer values to use %p instead, so that 64-bit pointer values are displayed correctly. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 3c1c28c..d586265 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2983,9 +2983,9 @@ static void link_print(struct link *l_ptr, const char *str) != (l_ptr->out_queue_size - 1)) || (l_ptr->last_out->next != NULL)) { tipc_printf(buf, "\nSend queue inconsistency\n"); - tipc_printf(buf, "first_out= %x ", l_ptr->first_out); - tipc_printf(buf, "next_out= %x ", l_ptr->next_out); - tipc_printf(buf, "last_out= %x ", l_ptr->last_out); + tipc_printf(buf, "first_out= %p ", l_ptr->first_out); + tipc_printf(buf, "next_out= %p ", l_ptr->next_out); + tipc_printf(buf, "last_out= %p ", l_ptr->last_out); } } else tipc_printf(buf, "[]"); -- cgit v1.1 From 01d83eddc55c138cbb24a5917d5271c0b24956a1 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 13:53:16 -0500 Subject: tipc: Clean up tracking of node requesting a broadcast retransmit Allows the broadcast link to track the node that is requesting a retransmit in a new field dedicated to that purpose. This replaces the existing mechanism that (ab)uses an existing node structure linked list field to do the tracking. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/bcast.c | 16 ++++++++++++++-- net/tipc/bcast.h | 3 ++- net/tipc/link.c | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index b4d659d..a5eb7db 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -93,6 +93,7 @@ struct bcbearer { * struct bclink - link used for broadcast messages * @link: (non-standard) broadcast link structure * @node: (non-standard) node structure representing b'cast link's peer node + * @retransmit_to: node that most recently requested a retransmit * * Handles sequence numbering, fragmentation, bundling, etc. */ @@ -100,6 +101,7 @@ struct bcbearer { struct bclink { struct link link; struct tipc_node node; + struct tipc_node *retransmit_to; }; @@ -184,6 +186,17 @@ static int bclink_ack_allowed(u32 n) /** + * tipc_bclink_retransmit_to - get most recent node to request retransmission + * + * Called with bc_lock locked + */ + +struct tipc_node *tipc_bclink_retransmit_to(void) +{ + return bclink->retransmit_to; +} + +/** * bclink_retransmit_pkt - retransmit broadcast packets * @after: sequence number of last packet to *not* retransmit * @to: sequence number of last packet to retransmit @@ -444,10 +457,9 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) tipc_node_unlock(node); spin_lock_bh(&bc_lock); bcl->stats.recv_nacks++; - bcl->owner->next = node; /* remember requestor */ + bclink->retransmit_to = node; bclink_retransmit_pkt(msg_bcgap_after(msg), msg_bcgap_to(msg)); - bcl->owner->next = NULL; spin_unlock_bh(&bc_lock); } else { tipc_bclink_peek_nack(msg_destnode(msg), diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 51f8c53..500c97f 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -2,7 +2,7 @@ * net/tipc/bcast.h: Include file for TIPC broadcast code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -90,6 +90,7 @@ void tipc_port_list_free(struct port_list *pl_ptr); int tipc_bclink_init(void); void tipc_bclink_stop(void); +struct tipc_node *tipc_bclink_retransmit_to(void); void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked); int tipc_bclink_send_msg(struct sk_buff *buf); void tipc_bclink_recv_pkt(struct sk_buff *buf); diff --git a/net/tipc/link.c b/net/tipc/link.c index d586265..0cb773b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1441,7 +1441,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf) info("Outstanding acks: %lu\n", (unsigned long) TIPC_SKB_CB(buf)->handle); - n_ptr = l_ptr->owner->next; + n_ptr = tipc_bclink_retransmit_to(); tipc_node_lock(n_ptr); tipc_addr_string_fill(addr_string, n_ptr->addr); -- cgit v1.1 From 9f54b545bd62a42ec354727d90eacadc5846406b Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 13:58:27 -0500 Subject: tipc: Eliminate unnecessary locking when starting topology service Modifies the initialization code for TIPC's topology service to avoid taking the spinlock protecting the subscriber list, since there is no need to do this. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/subscr.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 98ee50b..1387372 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -542,7 +542,6 @@ int tipc_subscr_start(void) spin_lock_init(&topsrv.lock); INIT_LIST_HEAD(&topsrv.subscriber_list); - spin_lock_bh(&topsrv.lock); res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE, NULL, @@ -563,12 +562,10 @@ int tipc_subscr_start(void) goto failed; } - spin_unlock_bh(&topsrv.lock); return 0; failed: err("Failed to create subscription service\n"); - spin_unlock_bh(&topsrv.lock); return res; } -- cgit v1.1 From 9bd80b60827fe8d84c0e594895acb8a44f2b98b1 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 15:02:50 -0500 Subject: tipc: Improve accuracy of link transmit queue maximum size statistic Enhances TIPC's unicast and broadcast link code to update the transmit queue maximum size counter in a single place, namely the routine that adds messages to the queue. This ensures that the maximum size statistic reported for unicast links is completely accurate, rather than being partially based on statistical sampling. The changes to link.h are just documenting the roles of the variables. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/bcast.c | 2 -- net/tipc/link.c | 9 +++------ net/tipc/link.h | 18 ++++++------------ 3 files changed, 9 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a5eb7db..63df42b 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -418,8 +418,6 @@ int tipc_bclink_send_msg(struct sk_buff *buf) else bclink_set_last_sent(); - if (bcl->out_queue_size > bcl->stats.max_queue_sz) - bcl->stats.max_queue_sz = bcl->out_queue_size; bcl->stats.queue_sz_counts++; bcl->stats.accu_queue_sz += bcl->out_queue_size; diff --git a/net/tipc/link.c b/net/tipc/link.c index 0cb773b..d1818fb 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -246,9 +246,6 @@ static void link_timeout(struct link *l_ptr) l_ptr->stats.accu_queue_sz += l_ptr->out_queue_size; l_ptr->stats.queue_sz_counts++; - if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz) - l_ptr->stats.max_queue_sz = l_ptr->out_queue_size; - if (l_ptr->first_out) { struct tipc_msg *msg = buf_msg(l_ptr->first_out); u32 length = msg_size(msg); @@ -824,7 +821,10 @@ static void link_add_to_outqueue(struct link *l_ptr, l_ptr->last_out = buf; } else l_ptr->first_out = l_ptr->last_out = buf; + l_ptr->out_queue_size++; + if (l_ptr->out_queue_size > l_ptr->stats.max_queue_sz) + l_ptr->stats.max_queue_sz = l_ptr->out_queue_size; } /* @@ -867,9 +867,6 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) /* Packet can be queued or sent: */ - if (queue_size > l_ptr->stats.max_queue_sz) - l_ptr->stats.max_queue_sz = queue_size; - if (likely(!tipc_bearer_congested(l_ptr->b_ptr, l_ptr) && !link_congested(l_ptr))) { link_add_to_outqueue(l_ptr, buf, msg); diff --git a/net/tipc/link.h b/net/tipc/link.h index bdb0fa2..a7794e7 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -196,18 +196,12 @@ struct link { u32 bearer_congs; u32 deferred_recv; u32 duplicates; - - /* for statistical profiling of send queue size */ - - u32 max_queue_sz; - u32 accu_queue_sz; - u32 queue_sz_counts; - - /* for statistical profiling of message lengths */ - - u32 msg_length_counts; - u32 msg_lengths_total; - u32 msg_length_profile[7]; + u32 max_queue_sz; /* send queue size high water mark */ + u32 accu_queue_sz; /* used for send queue size profiling */ + u32 queue_sz_counts; /* used for send queue size profiling */ + u32 msg_length_counts; /* used for message length profiling */ + u32 msg_lengths_total; /* used for message length profiling */ + u32 msg_length_profile[7]; /* used for msg. length profiling */ } stats; }; -- cgit v1.1 From f23d9bf2b7ba22fe49b65d344b3651049cecc51d Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 18 Jan 2011 15:15:34 -0500 Subject: tipc: Set unused probe field of link protocol messages to defined value Ensures that a link reset or activate message has a "probe" field of zero. (This field is currently unused in these messages, but this could potentially change in future versions of TIPC.) Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index d1818fb..754e310 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1946,6 +1946,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, msg_set_ack(msg, mod(l_ptr->reset_checkpoint - 1)); msg_set_seq_gap(msg, 0); msg_set_next_sent(msg, 1); + msg_set_probe(msg, 0); msg_set_link_tolerance(msg, l_ptr->tolerance); msg_set_linkprio(msg, l_ptr->priority); msg_set_max_pkt(msg, l_ptr->max_pkt_target); -- cgit v1.1 From 69218fc426569739d2bb68e15ac4905948409642 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 21 Jan 2011 09:45:33 -0500 Subject: tipc: Minor optimization to topology service connection establishment Eliminates a local iovec structure containing no data, which was previously used during the establishment of a topology service connection, since the same effect can be achieved by passing in a NULL pointer and an iovec length of zero. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/subscr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 1387372..aae9eae 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -472,8 +472,6 @@ static void subscr_named_msg_event(void *usr_handle, struct tipc_portid const *orig, struct tipc_name_seq const *dest) { - static struct iovec msg_sect = {NULL, 0}; - struct subscriber *subscriber; u32 server_port_ref; @@ -523,7 +521,7 @@ static void subscr_named_msg_event(void *usr_handle, /* Send an ACK- to complete connection handshaking */ - tipc_send(server_port_ref, 1, &msg_sect); + tipc_send(server_port_ref, 0, NULL); /* Handle optional subscription request */ -- cgit v1.1 From cb7ce91448c01724e18c1759aa5aba86e5f1c69b Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 24 Jan 2011 15:02:14 -0500 Subject: tipc: Fix port counter handling to correct congestion control Modifies TIPC's congestion control between a connected port and its peer so that it works as documented. The following changes have been made: 1) The counter of the number of messages sent by a port now starts at zero, rather than one. This prevents the port from reporting port congestion one message earlier than it was supposed to. 2) The counter of the number of messages sent by a port is now incremented only if a non-empty message is sent successfully. This prevents the port from becoming permanently congested if too many send attempts are unsuccessful because of congestion (or other reasons). It also removes the risk that empty hand- shaking messages used during connection setup might cause the port to report congestion earlier than it was supposed to. 3) The counter of the number of unacknowledged messages received by a port controlled by an internal TIPC service is now incremented only if the message is non-empty, in order to be consistent with the aforementioned changes. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/port.c | 53 ++++++++++++++++++++++++++++++++++------------------- net/tipc/port.h | 4 ++-- 2 files changed, 36 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/tipc/port.c b/net/tipc/port.c index aff5dc0..3e5122c 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -234,7 +234,6 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); msg_set_origport(msg, ref); p_ptr->last_in_seqno = 41; - p_ptr->sent = 1; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->dispatcher = dispatcher; @@ -732,6 +731,7 @@ static void port_dispatcher_sigh(void *dummy) tipc_conn_msg_event cb = up_ptr->conn_msg_cb; u32 peer_port = port_peerport(p_ptr); u32 peer_node = port_peernode(p_ptr); + u32 dsz; tipc_port_unlock(p_ptr); if (unlikely(!cb)) @@ -742,13 +742,14 @@ static void port_dispatcher_sigh(void *dummy) } else if ((msg_origport(msg) != peer_port) || (msg_orignode(msg) != peer_node)) goto reject; - if (unlikely(++p_ptr->conn_unacked >= - TIPC_FLOW_CONTROL_WIN)) + dsz = msg_data_sz(msg); + if (unlikely(dsz && + (++p_ptr->conn_unacked >= + TIPC_FLOW_CONTROL_WIN))) tipc_acknowledge(dref, p_ptr->conn_unacked); skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg)); + cb(usr_handle, dref, &buf, msg_data(msg), dsz); break; } case TIPC_DIRECT_MSG:{ @@ -1221,7 +1222,8 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) if (likely(res != -ELINKCONG)) { port_incr_out_seqno(p_ptr); p_ptr->congested = 0; - p_ptr->sent++; + if (res > 0) + p_ptr->sent++; return res; } } @@ -1263,13 +1265,17 @@ int tipc_send2name(u32 ref, struct tipc_name const *name, unsigned int domain, msg_set_destport(msg, destport); if (likely(destport)) { - p_ptr->sent++; if (likely(destnode == tipc_own_addr)) - return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); - res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, - destnode); - if (likely(res != -ELINKCONG)) + res = tipc_port_recv_sections(p_ptr, num_sect, + msg_sect); + else + res = tipc_link_send_sections_fast(p_ptr, msg_sect, + num_sect, destnode); + if (likely(res != -ELINKCONG)) { + if (res > 0) + p_ptr->sent++; return res; + } if (port_unreliable(p_ptr)) { /* Just calculate msg length and return */ return tipc_msg_calc_data_size(msg_sect, num_sect); @@ -1302,12 +1308,17 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, msg_set_destnode(msg, dest->node); msg_set_destport(msg, dest->ref); msg_set_hdr_sz(msg, DIR_MSG_H_SIZE); - p_ptr->sent++; + if (dest->node == tipc_own_addr) - return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); - res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, dest->node); - if (likely(res != -ELINKCONG)) + res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect); + else + res = tipc_link_send_sections_fast(p_ptr, msg_sect, num_sect, + dest->node); + if (likely(res != -ELINKCONG)) { + if (res > 0) + p_ptr->sent++; return res; + } if (port_unreliable(p_ptr)) { /* Just calculate msg length and return */ return tipc_msg_calc_data_size(msg_sect, num_sect); @@ -1343,12 +1354,16 @@ int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, skb_push(buf, DIR_MSG_H_SIZE); skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE); - p_ptr->sent++; + if (dest->node == tipc_own_addr) - return tipc_port_recv_msg(buf); - res = tipc_send_buf_fast(buf, dest->node); - if (likely(res != -ELINKCONG)) + res = tipc_port_recv_msg(buf); + else + res = tipc_send_buf_fast(buf, dest->node); + if (likely(res != -ELINKCONG)) { + if (res > 0) + p_ptr->sent++; return res; + } if (port_unreliable(p_ptr)) return dsz; return -ELINKCONG; diff --git a/net/tipc/port.h b/net/tipc/port.h index f8722af..34ccb7c 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -113,8 +113,8 @@ struct user_port { * @user_port: ptr to user port associated with port (if any) * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: - * @sent: - * @acked: + * @sent: # of non-empty messages sent by port + * @acked: # of non-empty message acknowledgements from connected port's peer * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: -- cgit v1.1 From 214dda4a36329fdd631e3aac0fee6e6fa369db62 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 24 Jan 2011 16:22:43 -0500 Subject: tipc: Add in missing lock during link initialization Ensure that the routine that starts up processing on a newly created link endpoint takes the spinlock of the node object that owns the link, to prevent possible conflicts with processing involving other links owned by that node object. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 754e310..89fbb6d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -388,7 +388,9 @@ void tipc_link_delete(struct link *l_ptr) static void link_start(struct link *l_ptr) { + tipc_node_lock(l_ptr->owner); link_state_event(l_ptr, STARTING_EVT); + tipc_node_unlock(l_ptr->owner); } /** -- cgit v1.1 From 741de3e9ff6e07e908e1cad2eb03e29677fde093 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 25 Jan 2011 13:33:31 -0500 Subject: tipc: Remove support for per-connection message sequence numbering Eliminates TIPC's prototype support for message sequence numbering on routable connections (i.e. connections requiring more than one hop). This capability isn't currently used, and can be removed since TIPC only supports systems in which all inter-node communication can be achieved in a single hop. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/msg.c | 6 +----- net/tipc/msg.h | 12 +----------- net/tipc/port.c | 29 +---------------------------- net/tipc/port.h | 2 -- 4 files changed, 3 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index bb6180c..e56b9b8 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -2,7 +2,7 @@ * net/tipc/msg.c: TIPC message header routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -381,14 +381,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, ":OPRT(%u):", msg_origport(msg)); tipc_printf(buf, ":DPRT(%u):", msg_destport(msg)); } - if (msg_routed(msg) && !msg_non_seq(msg)) - tipc_printf(buf, ":TSEQN(%u)", msg_transp_seqno(msg)); } if (msg_user(msg) == NAME_DISTRIBUTOR) { tipc_printf(buf, ":ONOD(%x):", msg_orignode(msg)); tipc_printf(buf, ":DNOD(%x):", msg_destnode(msg)); - if (msg_routed(msg)) - tipc_printf(buf, ":CSEQN(%u)", msg_transp_seqno(msg)); } if (msg_user(msg) == LINK_CONFIG) { diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 92c4c4f..b1438c7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -2,7 +2,7 @@ * net/tipc/msg.h: Include file for TIPC message header routines * * Copyright (c) 2000-2007, Ericsson AB - * Copyright (c) 2005-2008, Wind River Systems + * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -438,11 +438,6 @@ static inline void msg_set_nametype(struct tipc_msg *m, u32 n) msg_set_word(m, 8, n); } -static inline u32 msg_transp_seqno(struct tipc_msg *m) -{ - return msg_word(m, 8); -} - static inline void msg_set_timestamp(struct tipc_msg *m, u32 n) { msg_set_word(m, 8, n); @@ -453,11 +448,6 @@ static inline u32 msg_timestamp(struct tipc_msg *m) return msg_word(m, 8); } -static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n) -{ - msg_set_word(m, 8, n); -} - static inline u32 msg_nameinst(struct tipc_msg *m) { return msg_word(m, 9); diff --git a/net/tipc/port.c b/net/tipc/port.c index 3e5122c..6ff78f9 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -69,20 +69,6 @@ static u32 port_peerport(struct tipc_port *p_ptr) return msg_destport(&p_ptr->phdr); } -static u32 port_out_seqno(struct tipc_port *p_ptr) -{ - return msg_transp_seqno(&p_ptr->phdr); -} - -static void port_incr_out_seqno(struct tipc_port *p_ptr) -{ - struct tipc_msg *m = &p_ptr->phdr; - - if (likely(!msg_routed(m))) - return; - msg_set_transp_seqno(m, (msg_transp_seqno(m) + 1)); -} - /** * tipc_multicast - send a multicast message to local and remote destinations */ @@ -233,7 +219,6 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, msg = &p_ptr->phdr; tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); msg_set_origport(msg, ref); - p_ptr->last_in_seqno = 41; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->dispatcher = dispatcher; @@ -344,7 +329,7 @@ int tipc_set_portunreturnable(u32 ref, unsigned int isunrejectable) static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, u32 origport, u32 orignode, u32 usr, u32 type, u32 err, - u32 seqno, u32 ack) + u32 ack) { struct sk_buff *buf; struct tipc_msg *msg; @@ -357,7 +342,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, msg_set_destport(msg, destport); msg_set_origport(msg, origport); msg_set_orignode(msg, orignode); - msg_set_transp_seqno(msg, seqno); msg_set_msgcnt(msg, ack); } return buf; @@ -467,9 +451,7 @@ static void port_timeout(unsigned long ref) CONN_MANAGER, CONN_PROBE, TIPC_OK, - port_out_seqno(p_ptr), 0); - port_incr_out_seqno(p_ptr); p_ptr->probing_state = PROBING; k_start_timer(&p_ptr->timer, p_ptr->probing_interval); } @@ -506,7 +488,6 @@ static struct sk_buff *port_build_self_abort_msg(struct tipc_port *p_ptr, u32 er imp, TIPC_CONN_MSG, err, - p_ptr->last_in_seqno + 1, 0); } @@ -526,7 +507,6 @@ static struct sk_buff *port_build_peer_abort_msg(struct tipc_port *p_ptr, u32 er imp, TIPC_CONN_MSG, err, - port_out_seqno(p_ptr), 0); } @@ -568,7 +548,6 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf) TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, err, - 0, 0); goto exit; } @@ -582,11 +561,9 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf) CONN_MANAGER, CONN_PROBE_REPLY, TIPC_OK, - port_out_seqno(p_ptr), 0); } p_ptr->probing_state = CONFIRMED; - port_incr_out_seqno(p_ptr); exit: if (p_ptr) tipc_port_unlock(p_ptr); @@ -914,7 +891,6 @@ void tipc_acknowledge(u32 ref, u32 ack) CONN_MANAGER, CONN_ACK, TIPC_OK, - port_out_seqno(p_ptr), ack); } tipc_port_unlock(p_ptr); @@ -1088,7 +1064,6 @@ int tipc_connect2port(u32 ref, struct tipc_portid const *peer) msg_set_destport(msg, peer->ref); msg_set_orignode(msg, tipc_own_addr); msg_set_origport(msg, p_ptr->ref); - msg_set_transp_seqno(msg, 42); msg_set_type(msg, TIPC_CONN_MSG); msg_set_hdr_sz(msg, SHORT_H_SIZE); @@ -1170,7 +1145,6 @@ int tipc_shutdown(u32 ref) imp, TIPC_CONN_MSG, TIPC_CONN_SHUTDOWN, - port_out_seqno(p_ptr), 0); } tipc_port_unlock(p_ptr); @@ -1220,7 +1194,6 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) res = tipc_port_recv_sections(p_ptr, num_sect, msg_sect); if (likely(res != -ELINKCONG)) { - port_incr_out_seqno(p_ptr); p_ptr->congested = 0; if (res > 0) p_ptr->sent++; diff --git a/net/tipc/port.h b/net/tipc/port.h index 34ccb7c..87b9424 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -119,7 +119,6 @@ struct user_port { * @pub_count: total # of publications port has made during its lifetime * @probing_state: * @probing_interval: - * @last_in_seqno: * @timer_ref: * @subscription: "node down" subscription used to terminate failed connections */ @@ -147,7 +146,6 @@ struct tipc_port { u32 pub_count; u32 probing_state; u32 probing_interval; - u32 last_in_seqno; struct timer_list timer; struct tipc_node_subscr subscription; }; -- cgit v1.1 From 2e07dda1659095115e5e36a2fed0fddc1e3ea1c8 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 25 Jan 2011 14:39:59 -0500 Subject: tipc: Remove unused message header field for requested number of links Eliminates support for the "number of requested links" field in a neighbor discovery message. This field was never used and has been removed from the TIPC 2.0 protocol specification. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/bearer.c | 2 +- net/tipc/discover.c | 11 +++-------- net/tipc/discover.h | 3 +-- net/tipc/msg.c | 1 - net/tipc/msg.h | 10 ---------- 5 files changed, 5 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 9e2ff0e..f2839b0 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -566,7 +566,7 @@ restart: INIT_LIST_HEAD(&b_ptr->links); if (m_ptr->bcast) { b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, - bcast_scope, 2); + bcast_scope); } spin_lock_init(&b_ptr->lock); write_unlock_bh(&tipc_net_lock); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 59a86fc..09ce231 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -67,13 +67,11 @@ struct link_req { /** * tipc_disc_init_msg - initialize a link setup message * @type: message type (request or response) - * @req_links: number of links associated with message * @dest_domain: network domain of node(s) which should respond to message * @b_ptr: ptr to bearer issuing message */ static struct sk_buff *tipc_disc_init_msg(u32 type, - u32 req_links, u32 dest_domain, struct tipc_bearer *b_ptr) { @@ -84,7 +82,6 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, msg = buf_msg(buf); tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); - msg_set_req_links(msg, req_links); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); msg_set_media_addr(msg, &b_ptr->addr); @@ -191,7 +188,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) spin_unlock_bh(&n_ptr->lock); if ((type == DSC_RESP_MSG) || link_fully_up) return; - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, 1, orig, b_ptr); + rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); if (rbuf != NULL) { b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); buf_discard(rbuf); @@ -274,15 +271,13 @@ static void disc_timeout(struct link_req *req) * @b_ptr: ptr to bearer issuing requests * @dest: destination address for request messages * @dest_domain: network domain of node(s) which should respond to message - * @req_links: max number of desired links * * Returns pointer to link request structure, or NULL if unable to create. */ struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr, const struct tipc_media_addr *dest, - u32 dest_domain, - u32 req_links) + u32 dest_domain) { struct link_req *req; @@ -290,7 +285,7 @@ struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr, if (!req) return NULL; - req->buf = tipc_disc_init_msg(DSC_REQ_MSG, req_links, dest_domain, b_ptr); + req->buf = tipc_disc_init_msg(DSC_REQ_MSG, dest_domain, b_ptr); if (!req->buf) { kfree(req); return NULL; diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 4046d77..e48a167 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -41,8 +41,7 @@ struct link_req; struct link_req *tipc_disc_init_link_req(struct tipc_bearer *b_ptr, const struct tipc_media_addr *dest, - u32 dest_domain, - u32 req_links); + u32 dest_domain); void tipc_disc_update_link_req(struct link_req *req); void tipc_disc_stop_link_req(struct link_req *req); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index e56b9b8..0787e12 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -390,7 +390,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) if (msg_user(msg) == LINK_CONFIG) { u32 *raw = (u32 *)msg; struct tipc_media_addr *orig = (struct tipc_media_addr *)&raw[5]; - tipc_printf(buf, ":REQL(%u):", msg_req_links(msg)); tipc_printf(buf, ":DDOM(%x):", msg_dest_domain(msg)); tipc_printf(buf, ":NETID(%u):", msg_bc_netid(msg)); tipc_media_addr_printf(buf, orig); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index b1438c7..9d643a1 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -567,16 +567,6 @@ static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 16, 0x1fff, n); } -static inline u32 msg_req_links(struct tipc_msg *m) -{ - return msg_bits(m, 1, 16, 0xfff); -} - -static inline void msg_set_req_links(struct tipc_msg *m, u32 n) -{ - msg_set_bits(m, 1, 16, 0xfff, n); -} - /* * Word 2 -- cgit v1.1 From bf781ecfc6d6ecc4f66762a870f9c1fc76b9c8d5 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 25 Jan 2011 16:12:39 -0500 Subject: tipc: Avoid reliable broadcast preparation for NACK messages Enhance TIPC to skip unnecessary (and, in some cases, redundant) preparation work when sending a broadcast link NACK message, since this preparation is only required for broadcast messages that are sent in a reliable manner. This change also fixes a bug that caused NACK messages to be improperly counted as "TX packets" in TIPC's broadcast link statistics. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/bcast.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 63df42b..7dc1dc7 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -298,6 +298,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr) msg = buf_msg(buf); tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, n_ptr->addr); + msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); msg_set_bcgap_after(msg, n_ptr->bclink.gap_after); -- cgit v1.1 From 17a8f8e3734920cf2f030f2fa521a0b940ef6f90 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Thu, 24 Feb 2011 08:19:57 +0800 Subject: ipvs: use enum to instead of magic numbers Signed-off-by: Changli Gao Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 1f2a4e3..a48239a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -43,6 +43,13 @@ #include +enum { + IP_VS_RT_MODE_LOCAL = 1, /* Allow local dest */ + IP_VS_RT_MODE_NON_LOCAL = 2, /* Allow non-local dest */ + IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to + * local + */ +}; /* * Destination cache to speed up outgoing route lookup @@ -77,11 +84,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) return dst; } -/* - * Get route to destination or remote server - * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, - * &4=Allow redirect from remote daddr to local - */ +/* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, __be32 daddr, u32 rtos, int rt_mode) @@ -126,15 +129,16 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, } local = rt->rt_flags & RTCF_LOCAL; - if (!((local ? 1 : 2) & rt_mode)) { + if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & + rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", (rt->rt_flags & RTCF_LOCAL) ? "local":"non-local", &rt->rt_dst); ip_rt_put(rt); return NULL; } - if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) && - ort->rt_flags & RTCF_LOCAL)) { + if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && + !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " "requires NAT method, dest: %pI4\n", &ip_hdr(skb)->daddr, &rt->rt_dst); @@ -383,8 +387,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, - RT_TOS(iph->tos), 2))) + if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), + IP_VS_RT_MODE_NON_LOCAL))) goto tx_error_icmp; /* MTU checking */ @@ -512,7 +516,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), 1|2|4))) + RT_TOS(iph->tos), + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* @@ -755,7 +762,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(tos), 1|2))) + RT_TOS(tos), IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); @@ -984,7 +992,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(iph->tos), 1|2))) + RT_TOS(iph->tos), + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); @@ -1128,7 +1138,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, */ if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, - RT_TOS(ip_hdr(skb)->tos), 1|2|4))) + RT_TOS(ip_hdr(skb)->tos), + IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; -- cgit v1.1 From 214e005bc32c7045b8554f9f0fb07b3fcce2cd42 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:02:38 -0500 Subject: xfrm: Pass km_event pointers around as const when possible. Signed-off-by: David S. Miller --- net/key/af_key.c | 16 ++++++++-------- net/xfrm/xfrm_state.c | 4 ++-- net/xfrm/xfrm_user.c | 24 ++++++++++++------------ 3 files changed, 22 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 60fd2f1..7c5e101 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1429,7 +1429,7 @@ static inline int event2keytype(int event) } /* ADD/UPD/DEL */ -static int key_notify_sa(struct xfrm_state *x, struct km_event *c) +static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -1688,7 +1688,7 @@ static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr) return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); } -static int key_notify_sa_flush(struct km_event *c) +static int key_notify_sa_flush(const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -2123,7 +2123,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in return 0; } -static int key_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) +static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; @@ -2660,7 +2660,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg * return pfkey_do_dump(pfk); } -static int key_notify_policy_flush(struct km_event *c) +static int key_notify_policy_flush(const struct km_event *c) { struct sk_buff *skb_out; struct sadb_msg *hdr; @@ -2914,12 +2914,12 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) } } -static int key_notify_policy_expire(struct xfrm_policy *xp, struct km_event *c) +static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) { return 0; } -static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) +static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; @@ -2949,7 +2949,7 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) return 0; } -static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) +static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = x ? xs_net(x) : c->net; struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); @@ -2976,7 +2976,7 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) return 0; } -static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 30a0f17..7028f06 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1727,7 +1727,7 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) static LIST_HEAD(xfrm_km_list); static DEFINE_RWLOCK(xfrm_km_lock); -void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_mgr *km; @@ -1738,7 +1738,7 @@ void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) read_unlock(&xfrm_km_lock); } -void km_state_notify(struct xfrm_state *x, struct km_event *c) +void km_state_notify(struct xfrm_state *x, const struct km_event *c) { struct xfrm_mgr *km; read_lock(&xfrm_km_lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 6129196..2cc9dab 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1582,7 +1582,7 @@ static inline size_t xfrm_aevent_msgsize(void) + nla_total_size(4); /* XFRM_AE_ETHR */ } -static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) +static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; @@ -2220,7 +2220,7 @@ static inline size_t xfrm_expire_msgsize(void) + nla_total_size(sizeof(struct xfrm_mark)); } -static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) +static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; @@ -2242,7 +2242,7 @@ nla_put_failure: return -EMSGSIZE; } -static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct sk_buff *skb; @@ -2259,7 +2259,7 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } -static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct sk_buff *skb; @@ -2274,7 +2274,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); } -static int xfrm_notify_sa_flush(struct km_event *c) +static int xfrm_notify_sa_flush(const struct km_event *c) { struct net *net = c->net; struct xfrm_usersa_flush *p; @@ -2330,7 +2330,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) return l; } -static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) +static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct xfrm_usersa_info *p; @@ -2387,7 +2387,7 @@ nla_put_failure: return -1; } -static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c) +static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c) { switch (c->event) { @@ -2546,7 +2546,7 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) } static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, - int dir, struct km_event *c) + int dir, const struct km_event *c) { struct xfrm_user_polexpire *upe; struct nlmsghdr *nlh; @@ -2576,7 +2576,7 @@ nlmsg_failure: return -EMSGSIZE; } -static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct net *net = xp_net(xp); struct sk_buff *skb; @@ -2591,7 +2591,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } -static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; @@ -2656,7 +2656,7 @@ nlmsg_failure: return -1; } -static int xfrm_notify_policy_flush(struct km_event *c) +static int xfrm_notify_policy_flush(const struct km_event *c) { struct net *net = c->net; struct nlmsghdr *nlh; @@ -2681,7 +2681,7 @@ nlmsg_failure: return -1; } -static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) +static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { switch (c->event) { -- cgit v1.1 From 19bd62441c36279ab33e311faebd357ef04ba344 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:07:20 -0500 Subject: xfrm: Const'ify tmpl and address arguments to ->init_temprop() Signed-off-by: David S. Miller --- net/ipv4/xfrm4_state.c | 4 ++-- net/ipv6/xfrm6_state.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 19eb560..983eff2 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -37,8 +37,8 @@ __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) } static void -xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +xfrm4_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, + const xfrm_address_t *daddr, const xfrm_address_t *saddr) { x->id = tmpl->id; if (x->id.daddr.a4 == 0) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 68a14c0..a02598e 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -38,8 +38,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) } static void -xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +xfrm6_init_temprop(struct xfrm_state *x, const struct xfrm_tmpl *tmpl, + const xfrm_address_t *daddr, const xfrm_address_t *saddr) { x->id = tmpl->id; if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) -- cgit v1.1 From 200ce96e5601391a6d97c87067edf21fa94fb74e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:12:25 -0500 Subject: xfrm: Const'ify selector argument to xfrm_selector_match() Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 28c865a..4827c8d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -58,7 +58,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); static inline int -__xfrm4_selector_match(struct xfrm_selector *sel, const struct flowi *fl) +__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && @@ -69,7 +69,7 @@ __xfrm4_selector_match(struct xfrm_selector *sel, const struct flowi *fl) } static inline int -__xfrm6_selector_match(struct xfrm_selector *sel, const struct flowi *fl) +__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && @@ -79,7 +79,7 @@ __xfrm6_selector_match(struct xfrm_selector *sel, const struct flowi *fl) (fl->oif == sel->ifindex || !sel->ifindex); } -int xfrm_selector_match(struct xfrm_selector *sel, const struct flowi *fl, +int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, unsigned short family) { switch (family) { -- cgit v1.1 From 5e6b930f21b0a442f9d5db97c8314b4d91be1c27 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:14:45 -0500 Subject: xfrm: Const'ify address arguments to ->dst_lookup() Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 4 ++-- net/ipv6/xfrm6_policy.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 1e9844d..63aa88e 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -19,8 +19,8 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, - xfrm_address_t *saddr, - xfrm_address_t *daddr) + const xfrm_address_t *saddr, + const xfrm_address_t *daddr) { struct flowi fl = { .fl4_dst = daddr->a4, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f2fa904..c128ca1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -27,8 +27,8 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo; static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, - xfrm_address_t *saddr, - xfrm_address_t *daddr) + const xfrm_address_t *saddr, + const xfrm_address_t *daddr) { struct flowi fl = {}; struct dst_entry *dst; -- cgit v1.1 From 6418c4e07991a7b405d86bd4579c670b50fec99d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:16:53 -0500 Subject: xfrm: Const'ify address arguments to __xfrm_dst_lookup() Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4827c8d..5f19ae6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -92,8 +92,8 @@ int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, } static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, - xfrm_address_t *saddr, - xfrm_address_t *daddr, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, int family) { struct xfrm_policy_afinfo *afinfo; -- cgit v1.1 From dd701754e7d230330adc0e212b94106bbfd34841 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:21:08 -0500 Subject: xfrm: Const'ify pointer args to migrate_tmpl_match and xfrm_migrate_check Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 5f19ae6..eb76da7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2788,7 +2788,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, return ret; } -static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t) +static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) { int match = 0; @@ -2858,7 +2858,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, return 0; } -static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) +static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) { int i, j; -- cgit v1.1 From 183cad12785ffc036571c4b789dc084ec61a1bad Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:28:01 -0500 Subject: xfrm: Const'ify pointer args to km_migrate() and implementations. Signed-off-by: David S. Miller --- net/key/af_key.c | 22 +++++++++++----------- net/xfrm/xfrm_state.c | 6 +++--- net/xfrm/xfrm_user.c | 24 ++++++++++++------------ 3 files changed, 26 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 7c5e101..5637285 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -690,7 +690,7 @@ static inline int pfkey_mode_to_xfrm(int mode) } } -static unsigned int pfkey_sockaddr_fill(xfrm_address_t *xaddr, __be16 port, +static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port, struct sockaddr *sa, unsigned short family) { @@ -3318,7 +3318,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, #ifdef CONFIG_NET_KEY_MIGRATE static int set_sadb_address(struct sk_buff *skb, int sasize, int type, - struct xfrm_selector *sel) + const struct xfrm_selector *sel) { struct sadb_address *addr; addr = (struct sadb_address *)skb_put(skb, sizeof(struct sadb_address) + sasize); @@ -3348,7 +3348,7 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, } -static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) +static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k) { struct sadb_x_kmaddress *kma; u8 *sa; @@ -3376,7 +3376,7 @@ static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, - xfrm_address_t *src, xfrm_address_t *dst) + const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; u8 *sa; @@ -3404,9 +3404,9 @@ static int set_ipsecrequest(struct sk_buff *skb, #endif #ifdef CONFIG_NET_KEY_MIGRATE -static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k) +static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_bundles, + const struct xfrm_kmaddress *k) { int i; int sasize_sel; @@ -3415,7 +3415,7 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_x_policy *pol; - struct xfrm_migrate *mp; + const struct xfrm_migrate *mp; if (type != XFRM_POLICY_TYPE_MAIN) return 0; @@ -3513,9 +3513,9 @@ err: return -EINVAL; } #else -static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles, - struct xfrm_kmaddress *k) +static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_bundles, + const struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7028f06..555bedd 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1819,9 +1819,9 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE -int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) +int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { int err = -EINVAL; int ret; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2cc9dab..b43c1b1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1986,7 +1986,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, #endif #ifdef CONFIG_XFRM_MIGRATE -static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) +static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb) { struct xfrm_user_migrate um; @@ -2004,7 +2004,7 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } -static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) +static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb) { struct xfrm_user_kmaddress uk; @@ -2025,11 +2025,11 @@ static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) + userpolicy_type_attrsize(); } -static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, - int num_migrate, struct xfrm_kmaddress *k, - struct xfrm_selector *sel, u8 dir, u8 type) +static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, + int num_migrate, const struct xfrm_kmaddress *k, + const struct xfrm_selector *sel, u8 dir, u8 type) { - struct xfrm_migrate *mp; + const struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; int i; @@ -2061,9 +2061,9 @@ nlmsg_failure: return -EMSGSIZE; } -static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { struct net *net = &init_net; struct sk_buff *skb; @@ -2079,9 +2079,9 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else -static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) +static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, + const struct xfrm_migrate *m, int num_migrate, + const struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } -- cgit v1.1 From 5f803b58cd8528a93fbb72fa7b011547e7b1a310 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:33:19 -0500 Subject: xfrm: Const'ify address args to hash helpers. Signed-off-by: David S. Miller --- net/xfrm/xfrm_hash.h | 32 +++++++++++++++++++------------- net/xfrm/xfrm_policy.c | 9 +++++++-- 2 files changed, 26 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index 8e69533..7199d78 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -4,29 +4,32 @@ #include #include -static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm4_addr_hash(const xfrm_address_t *addr) { return ntohl(addr->a4); } -static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) +static inline unsigned int __xfrm6_addr_hash(const xfrm_address_t *addr) { return ntohl(addr->a6[2] ^ addr->a6[3]); } -static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +static inline unsigned int __xfrm4_daddr_saddr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr) { u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; return ntohl((__force __be32)sum); } -static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) +static inline unsigned int __xfrm6_daddr_saddr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr) { return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ saddr->a6[2] ^ saddr->a6[3]); } -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, +static inline unsigned int __xfrm_dst_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, u32 reqid, unsigned short family, unsigned int hmask) { @@ -42,8 +45,8 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t return (h ^ (h >> 16)) & hmask; } -static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, +static inline unsigned __xfrm_src_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, unsigned short family, unsigned int hmask) { @@ -60,8 +63,8 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr, } static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family, - unsigned int hmask) +__xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto, + unsigned short family, unsigned int hmask) { unsigned int h = (__force u32)spi ^ proto; switch (family) { @@ -80,10 +83,11 @@ static inline unsigned int __idx_hash(u32 index, unsigned int hmask) return (index ^ (index >> 8)) & hmask; } -static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +static inline unsigned int __sel_hash(const struct xfrm_selector *sel, + unsigned short family, unsigned int hmask) { - xfrm_address_t *daddr = &sel->daddr; - xfrm_address_t *saddr = &sel->saddr; + const xfrm_address_t *daddr = &sel->daddr; + const xfrm_address_t *saddr = &sel->saddr; unsigned int h = 0; switch (family) { @@ -107,7 +111,9 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short return h & hmask; } -static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +static inline unsigned int __addr_hash(const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, unsigned int hmask) { unsigned int h = 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index eb76da7..0770b3a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -311,7 +311,9 @@ static inline unsigned int idx_hash(struct net *net, u32 index) return __idx_hash(index, net->xfrm.policy_idx_hmask); } -static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selector *sel, unsigned short family, int dir) +static struct hlist_head *policy_hash_bysel(struct net *net, + const struct xfrm_selector *sel, + unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __sel_hash(sel, family, hmask); @@ -321,7 +323,10 @@ static struct hlist_head *policy_hash_bysel(struct net *net, struct xfrm_selecto net->xfrm.policy_bydst[dir].table + hash); } -static struct hlist_head *policy_hash_direct(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +static struct hlist_head *policy_hash_direct(struct net *net, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash = __addr_hash(daddr, saddr, family, hmask); -- cgit v1.1 From b4b7c0b389131c34b6c3a6bf3f3c4d17fe59155f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 00:35:06 -0500 Subject: xfrm: Const'ify selector args in xfrm_migrate paths. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0770b3a..0c503be 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2736,8 +2736,8 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE -static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, - struct xfrm_selector *sel_tgt) +static int xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, + const struct xfrm_selector *sel_tgt) { if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { if (sel_tgt->family == sel_cmp->family && @@ -2757,7 +2757,7 @@ static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp, return 0; } -static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, +static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type) { struct xfrm_policy *pol, *ret = NULL; @@ -2897,7 +2897,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) return 0; } -int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, +int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k) { -- cgit v1.1 From 0b597e7edfd865cce7b18e71989a992ad0ca898e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:22:48 -0500 Subject: xfrm: Const'ify local xfrm_address_t pointers in xfrm_policy_lookup_bytype. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0c503be..d097668 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -894,7 +894,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, { int err; struct xfrm_policy *pol, *ret; - xfrm_address_t *daddr, *saddr; + const xfrm_address_t *daddr, *saddr; struct hlist_node *entry; struct hlist_head *chain; u32 priority = ~0U; -- cgit v1.1 From f299d557cb7fca4219020b19dab28ed26738c3ee Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:23:30 -0500 Subject: xfrm: Const'ify policy arg and local selector in xfrm_policy_match. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d097668..9f6c7a7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -869,10 +869,11 @@ EXPORT_SYMBOL(xfrm_policy_walk_done); * * Returns 0 if policy found, else an -errno. */ -static int xfrm_policy_match(struct xfrm_policy *pol, const struct flowi *fl, +static int xfrm_policy_match(const struct xfrm_policy *pol, + const struct flowi *fl, u8 type, u16 family, int dir) { - struct xfrm_selector *sel = &pol->selector; + const struct xfrm_selector *sel = &pol->selector; int match, ret = -ESRCH; if (pol->family != family || -- cgit v1.1 From d3e40a9f5ed53894bc0ba8cf010844f1028afe29 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:25:41 -0500 Subject: xfrm: Const'ify policy arg to clone_policy. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9f6c7a7..f1f90af 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1105,7 +1105,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return 0; } -static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) +static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); -- cgit v1.1 From 1786b3891c5d72803e48b990ebad4ac1b6fd9700 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:32:54 -0500 Subject: xfrm: Const'ify selector arg to xfrm_dst_update_parent. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f1f90af..3a4221a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1525,7 +1525,7 @@ xfrm_dst_alloc_copy(void **target, const void *src, int size) } static int inline -xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel) +xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; -- cgit v1.1 From 7db454b9125100877b6aa15009cf9a73c68ac755 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:43:01 -0500 Subject: xfrm: Const'ify ptr args to xfrm_state_ok. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3a4221a..f766e5f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1933,7 +1933,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) */ static inline int -xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, +xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, unsigned short family) { if (xfrm_state_kern(x)) -- cgit v1.1 From 22cccb7e03125155624d0893b86a151155f1048e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:43:33 -0500 Subject: xfrm: Const'ify ptr args to xfrm_policy_ok. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f766e5f..2de0bc2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1956,7 +1956,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, * Otherwise "-2 - errored_index" is returned. */ static inline int -xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, +xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, unsigned short family) { int idx = start; -- cgit v1.1 From 9a7386ec999ae226890faea2661b4c7d494bcbb8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:44:12 -0500 Subject: xfrm: Const'ify sec_path arg to secpath_has_nontransport. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2de0bc2..41a91d2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1994,7 +1994,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, } EXPORT_SYMBOL(__xfrm_decode_session); -static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) +static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { -- cgit v1.1 From 2ab38503d0dff932cb657d8ef6055f28910ac0ef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:47:16 -0500 Subject: xfrm: Const'ify xfrm_address_t args to xfrm_*_hash. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 555bedd..0383d83 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -50,8 +50,8 @@ static void xfrm_audit_state_replay(struct xfrm_state *x, #endif /* CONFIG_AUDITSYSCALL */ static inline unsigned int xfrm_dst_hash(struct net *net, - xfrm_address_t *daddr, - xfrm_address_t *saddr, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, u32 reqid, unsigned short family) { @@ -59,15 +59,16 @@ static inline unsigned int xfrm_dst_hash(struct net *net, } static inline unsigned int xfrm_src_hash(struct net *net, - xfrm_address_t *daddr, - xfrm_address_t *saddr, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, unsigned short family) { return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); } static inline unsigned int -xfrm_spi_hash(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, + __be32 spi, u8 proto, unsigned short family) { return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); } -- cgit v1.1 From 046860138e3f244d19e59c4fb1ef637803f3abbf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:50:12 -0500 Subject: xfrm: Const'ify xfrm_tmpl arg to xfrm_init_tempstate. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0383d83..ac6c48a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -658,7 +658,7 @@ EXPORT_SYMBOL(xfrm_sad_getinfo); static int xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, - struct xfrm_tmpl *tmpl, + const struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family) { -- cgit v1.1 From 9aa600889be2f6a6a5fed85a33d4530920662965 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:51:36 -0500 Subject: xfrm: Const'ify xfrm_address_t args to __xfrm_state_lookup{,_byaddr}. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ac6c48a..8a57a1e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -678,7 +678,10 @@ xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, + const xfrm_address_t *daddr, + __be32 spi, u8 proto, + unsigned short family) { unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; @@ -700,7 +703,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_ad return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, + u8 proto, unsigned short family) { unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; -- cgit v1.1 From 1f673c5fe2eca9007e60d82186473aa94090ea4c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:53:13 -0500 Subject: xfrm: Remove unused 'saddr' and 'daddr' args to xfrm_state_look_at. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8a57a1e..9d9ac7c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -754,7 +754,6 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, const struct flowi *fl, unsigned short family, - xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_state **best, int *acq_in_progress, int *error) { @@ -820,7 +819,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, &best, &acquire_in_progress, &error); } if (best) @@ -836,7 +835,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, &best, &acquire_in_progress, &error); } -- cgit v1.1 From 33765d06033cc4ba4d9ae6d3d606ef3f28773c1b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 01:55:45 -0500 Subject: xfrm: Const'ify xfrm_address_t args to xfrm_state_find. This required a const'ification in xfrm_init_tempstate() too. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9d9ac7c..8496b3d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -659,7 +659,7 @@ EXPORT_SYMBOL(xfrm_sad_getinfo); static int xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl, const struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); @@ -790,7 +790,7 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, } struct xfrm_state * -xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, const struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family) -- cgit v1.1 From dca8b089c95d94afa1d715df257de0286350e99d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 13:38:12 -0800 Subject: ipv4: Rearrange how ip_route_newports() gets port keys. ip_route_newports() is the only place in the entire kernel that cares about the port members in the routing cache entry's lookup flow key. Therefore the only reason we store an entire flow inside of the struct rtentry is for this one special case. Rewrite ip_route_newports() such that: 1) The caller passes in the original port values, so we don't need to use the rth->fl.fl_ip_{s,d}port values to remember them. 2) The lookup flow is constructed by hand instead of being copied from the routing cache entry's flow. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 10 +++++++--- net/ipv4/tcp_ipv4.c | 6 +++++- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 45a434f..9379891 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -43,6 +43,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + __be16 orig_sport, orig_dport; struct rtable *rt; __be32 daddr, nexthop; int tmp; @@ -63,10 +64,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } + orig_sport = inet->inet_sport; + orig_dport = usin->sin_port; tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - inet->inet_sport, usin->sin_port, sk, 1); + orig_sport, orig_dport, sk, 1); if (tmp < 0) return tmp; @@ -99,8 +102,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, - inet->inet_dport, sk); + err = ip_route_newports(&rt, IPPROTO_DCCP, + orig_sport, orig_dport, + inet->inet_sport, inet->inet_dport, sk); if (err != 0) goto failure; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ef5a90b..27a0cc8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -149,6 +149,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + __be16 orig_sport, orig_dport; struct rtable *rt; __be32 daddr, nexthop; int tmp; @@ -167,10 +168,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) nexthop = inet->opt->faddr; } + orig_sport = inet->inet_sport; + orig_dport = usin->sin_port; tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, - inet->inet_sport, usin->sin_port, sk, 1); + orig_sport, orig_dport, sk, 1); if (tmp < 0) { if (tmp == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); @@ -234,6 +237,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto failure; err = ip_route_newports(&rt, IPPROTO_TCP, + orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (err) goto failure; -- cgit v1.1 From b552f7e3a9524abcbcdf86f0a99b2be58e55a9c6 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 19 Feb 2011 17:32:28 +0800 Subject: ipvs: unify the formula to estimate the overhead of processing connections lc and wlc use the same formula, but lblc and lblcr use another one. There is no reason for using two different formulas for the lc variants. The formula used by lc is used by all the lc variants in this patch. Signed-off-by: Changli Gao Acked-by: Wensong Zhang Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_lblc.c | 13 +++---------- net/netfilter/ipvs/ip_vs_lblcr.c | 25 +++++++------------------ net/netfilter/ipvs/ip_vs_lc.c | 18 +----------------- net/netfilter/ipvs/ip_vs_wlc.c | 20 ++------------------ 4 files changed, 13 insertions(+), 63 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 4a9c8cd..6bf7a80 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -389,12 +389,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) int loh, doh; /* - * We think the overhead of processing active connections is fifty - * times higher than that of inactive connections in average. (This - * fifty times might not be accurate, we will change it later.) We - * use the following formula to estimate the overhead: - * dest->activeconns*50 + dest->inactconns - * and the load: + * We use the following formula to estimate the load: * (dest overhead) / dest->weight * * Remember -- no floats in kernel mode!!! @@ -410,8 +405,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) continue; if (atomic_read(&dest->weight) > 0) { least = dest; - loh = atomic_read(&least->activeconns) * 50 - + atomic_read(&least->inactconns); + loh = ip_vs_dest_conn_overhead(least); goto nextstage; } } @@ -425,8 +419,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; - doh = atomic_read(&dest->activeconns) * 50 - + atomic_read(&dest->inactconns); + doh = ip_vs_dest_conn_overhead(dest); if (loh * atomic_read(&dest->weight) > doh * atomic_read(&least->weight)) { least = dest; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index bd329b1..0063176 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -178,8 +178,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) if ((atomic_read(&least->weight) > 0) && (least->flags & IP_VS_DEST_F_AVAILABLE)) { - loh = atomic_read(&least->activeconns) * 50 - + atomic_read(&least->inactconns); + loh = ip_vs_dest_conn_overhead(least); goto nextstage; } } @@ -192,8 +191,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; - doh = atomic_read(&dest->activeconns) * 50 - + atomic_read(&dest->inactconns); + doh = ip_vs_dest_conn_overhead(dest); if ((loh * atomic_read(&dest->weight) > doh * atomic_read(&least->weight)) && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { @@ -228,8 +226,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) list_for_each_entry(e, &set->list, list) { most = e->dest; if (atomic_read(&most->weight) > 0) { - moh = atomic_read(&most->activeconns) * 50 - + atomic_read(&most->inactconns); + moh = ip_vs_dest_conn_overhead(most); goto nextstage; } } @@ -239,8 +236,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) nextstage: list_for_each_entry(e, &set->list, list) { dest = e->dest; - doh = atomic_read(&dest->activeconns) * 50 - + atomic_read(&dest->inactconns); + doh = ip_vs_dest_conn_overhead(dest); /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ if ((moh * atomic_read(&dest->weight) < doh * atomic_read(&most->weight)) @@ -563,12 +559,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) int loh, doh; /* - * We think the overhead of processing active connections is fifty - * times higher than that of inactive connections in average. (This - * fifty times might not be accurate, we will change it later.) We - * use the following formula to estimate the overhead: - * dest->activeconns*50 + dest->inactconns - * and the load: + * We use the following formula to estimate the load: * (dest overhead) / dest->weight * * Remember -- no floats in kernel mode!!! @@ -585,8 +576,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) if (atomic_read(&dest->weight) > 0) { least = dest; - loh = atomic_read(&least->activeconns) * 50 - + atomic_read(&least->inactconns); + loh = ip_vs_dest_conn_overhead(least); goto nextstage; } } @@ -600,8 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; - doh = atomic_read(&dest->activeconns) * 50 - + atomic_read(&dest->inactconns); + doh = ip_vs_dest_conn_overhead(dest); if (loh * atomic_read(&dest->weight) > doh * atomic_read(&least->weight)) { least = dest; diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 6063800..f391819 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -22,22 +22,6 @@ #include - -static inline unsigned int -ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) -{ - /* - * We think the overhead of processing active connections is 256 - * times higher than that of inactive connections in average. (This - * 256 times might not be accurate, we will change it later) We - * use the following formula to estimate the overhead now: - * dest->activeconns*256 + dest->inactconns - */ - return (atomic_read(&dest->activeconns) << 8) + - atomic_read(&dest->inactconns); -} - - /* * Least Connection scheduling */ @@ -62,7 +46,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || atomic_read(&dest->weight) == 0) continue; - doh = ip_vs_lc_dest_overhead(dest); + doh = ip_vs_dest_conn_overhead(dest); if (!least || doh < loh) { least = dest; loh = doh; diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index fdf0f58..bc1bfc4 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -27,22 +27,6 @@ #include - -static inline unsigned int -ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) -{ - /* - * We think the overhead of processing active connections is 256 - * times higher than that of inactive connections in average. (This - * 256 times might not be accurate, we will change it later) We - * use the following formula to estimate the overhead now: - * dest->activeconns*256 + dest->inactconns - */ - return (atomic_read(&dest->activeconns) << 8) + - atomic_read(&dest->inactconns); -} - - /* * Weighted Least Connection scheduling */ @@ -71,7 +55,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) { least = dest; - loh = ip_vs_wlc_dest_overhead(least); + loh = ip_vs_dest_conn_overhead(least); goto nextstage; } } @@ -85,7 +69,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) list_for_each_entry_continue(dest, &svc->destinations, n_list) { if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; - doh = ip_vs_wlc_dest_overhead(dest); + doh = ip_vs_dest_conn_overhead(dest); if (loh * atomic_read(&dest->weight) > doh * atomic_read(&least->weight)) { least = dest; -- cgit v1.1 From 861d7f745f37506bbd90227e97b95baf2a5fac34 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 13:04:17 +0000 Subject: netem: cleanup dump code Use nla_put_nested to update netlink attribute value. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 64f0d32..d367783 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -562,8 +562,7 @@ static void netem_destroy(struct Qdisc *sch) static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) { const struct netem_sched_data *q = qdisc_priv(sch); - unsigned char *b = skb_tail_pointer(skb); - struct nlattr *nla = (struct nlattr *) b; + struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); struct tc_netem_qopt qopt; struct tc_netem_corr cor; struct tc_netem_reorder reorder; @@ -590,12 +589,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) corrupt.correlation = q->corrupt_cor.rho; NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); - nla->nla_len = skb_tail_pointer(skb) - b; - - return skb->len; + return nla_nest_end(skb, nla); nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_trim(skb, nla); return -1; } -- cgit v1.1 From 6373a9a286bdd955a76924cee88a2f8f784988b1 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 13:04:18 +0000 Subject: netem: use vmalloc for distribution table The netem probability table can be large (up to 64K bytes) which may be too large to allocate in one contiguous chunk. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d367783..86dad1e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -308,6 +308,16 @@ static void netem_reset(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); } +static void dist_free(struct disttable *d) +{ + if (d) { + if (is_vmalloc_addr(d)) + vfree(d); + else + kfree(d); + } +} + /* * Distribution data is a variable size payload containing * signed 16 bit values. @@ -315,16 +325,20 @@ static void netem_reset(struct Qdisc *sch) static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); - unsigned long n = nla_len(attr)/sizeof(__s16); + size_t n = nla_len(attr)/sizeof(__s16); const __s16 *data = nla_data(attr); spinlock_t *root_lock; struct disttable *d; int i; + size_t s; if (n > 65536) return -EINVAL; - d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); + s = sizeof(struct disttable) + n * sizeof(s16); + d = kmalloc(s, GFP_KERNEL); + if (!d) + d = vmalloc(s); if (!d) return -ENOMEM; @@ -335,7 +349,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) root_lock = qdisc_root_sleeping_lock(sch); spin_lock_bh(root_lock); - kfree(q->delay_dist); + dist_free(q->delay_dist); q->delay_dist = d; spin_unlock_bh(root_lock); return 0; @@ -556,7 +570,7 @@ static void netem_destroy(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); qdisc_destroy(q->qdisc); - kfree(q->delay_dist); + dist_free(q->delay_dist); } static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) -- cgit v1.1 From df173bda2639ac744ccf596ec1f8f7e66fe4c343 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 13:04:19 +0000 Subject: netem: define NETEM_DIST_MAX Rather than magic constant in code, expose the maximum size of packet distribution table in API. In iproute2, q_netem defines MAX_DIST as 16K already. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 86dad1e..289febd 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -332,7 +332,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) int i; size_t s; - if (n > 65536) + if (n > NETEM_DIST_MAX) return -EINVAL; s = sizeof(struct disttable) + n * sizeof(s16); -- cgit v1.1 From 10f6dfcfde884441db89dc66b945d6c948e1d356 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 13:04:20 +0000 Subject: Revert "sch_netem: Remove classful functionality" Many users have wanted the old functionality that was lost to be able to use pfifo as inner qdisc for netem. The reason that netem could not be classful with the older API was because of the limitations of the old dequeue/requeue interface; now that qdisc API has a peek function, there is no longer a problem with using any inner qdisc's. This reverts commit 02201464119334690fe209849843881b8e9cfa9f. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 289febd..f176890 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -238,14 +238,15 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = NET_XMIT_SUCCESS; } - if (likely(ret == NET_XMIT_SUCCESS)) { - sch->q.qlen++; - } else if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) { + sch->qstats.drops++; + return ret; + } } - pr_debug("netem: enqueue ret %d\n", ret); - return ret; + sch->q.qlen++; + return NET_XMIT_SUCCESS; } static unsigned int netem_drop(struct Qdisc *sch) @@ -287,9 +288,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) skb->tstamp.tv64 = 0; #endif - pr_debug("netem_dequeue: return skb=%p\n", skb); - qdisc_bstats_update(sch, skb); + sch->q.qlen--; + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); return skb; } @@ -610,8 +612,77 @@ nla_put_failure: return -1; } +static int netem_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct netem_sched_data *q = qdisc_priv(sch); + + if (cl != 1) /* only one class */ + return -ENOENT; + + tcm->tcm_handle |= TC_H_MIN(1); + tcm->tcm_info = q->qdisc->handle; + + return 0; +} + +static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct netem_sched_data *q = qdisc_priv(sch); + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->qdisc; + q->qdisc = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + + return 0; +} + +static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct netem_sched_data *q = qdisc_priv(sch); + return q->qdisc; +} + +static unsigned long netem_get(struct Qdisc *sch, u32 classid) +{ + return 1; +} + +static void netem_put(struct Qdisc *sch, unsigned long arg) +{ +} + +static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) +{ + if (!walker->stop) { + if (walker->count >= walker->skip) + if (walker->fn(sch, 1, walker) < 0) { + walker->stop = 1; + return; + } + walker->count++; + } +} + +static const struct Qdisc_class_ops netem_class_ops = { + .graft = netem_graft, + .leaf = netem_leaf, + .get = netem_get, + .put = netem_put, + .walk = netem_walk, + .dump = netem_dump_class, +}; + static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .id = "netem", + .cl_ops = &netem_class_ops, .priv_size = sizeof(struct netem_sched_data), .enqueue = netem_enqueue, .dequeue = netem_dequeue, -- cgit v1.1 From 661b79725fea030803a89a16cda506bac8eeca78 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 13:04:21 +0000 Subject: netem: revised correlated loss generator This is a patch originated with Stefano Salsano and Fabio Ludovici. It provides several alternative loss models for use with netem. This patch adds two state machine based loss models. See: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 274 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 270 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f176890..5bbcccc 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -47,6 +47,20 @@ layering other disciplines. It does not need to do bandwidth control either since that can be handled by using token bucket or other rate control. + + Correlated Loss Generator models + + Added generation of correlated loss according to the + "Gilbert-Elliot" model, a 4-state markov model. + + References: + [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG + [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general + and intuitive loss model for packet networks and its implementation + in the Netem module in the Linux kernel", available in [1] + + Authors: Stefano Salsano */ struct netem_sched_data { @@ -73,6 +87,26 @@ struct netem_sched_data { u32 size; s16 table[0]; } *delay_dist; + + enum { + CLG_RANDOM, + CLG_4_STATES, + CLG_GILB_ELL, + } loss_model; + + /* Correlated Loss Generation models */ + struct clgstate { + /* state of the Markov chain */ + u8 state; + + /* 4-states and Gilbert-Elliot models */ + u32 a1; /* p13 for 4-states or p for GE */ + u32 a2; /* p31 for 4-states or r for GE */ + u32 a3; /* p32 for 4-states or h for GE */ + u32 a4; /* p14 for 4-states or 1-k for GE */ + u32 a5; /* p23 used only in 4-states */ + } clg; + }; /* Time stamp put into socket buffer control block */ @@ -115,6 +149,122 @@ static u32 get_crandom(struct crndstate *state) return answer; } +/* loss_4state - 4-state model loss generator + * Generates losses according to the 4-state Markov chain adopted in + * the GI (General and Intuitive) loss model. + */ +static bool loss_4state(struct netem_sched_data *q) +{ + struct clgstate *clg = &q->clg; + u32 rnd = net_random(); + + /* + * Makes a comparision between rnd and the transition + * probabilities outgoing from the current state, then decides the + * next state and if the next packet has to be transmitted or lost. + * The four states correspond to: + * 1 => successfully transmitted packets within a gap period + * 4 => isolated losses within a gap period + * 3 => lost packets within a burst period + * 2 => successfully transmitted packets within a burst period + */ + switch (clg->state) { + case 1: + if (rnd < clg->a4) { + clg->state = 4; + return true; + } else if (clg->a4 < rnd && rnd < clg->a1) { + clg->state = 3; + return true; + } else if (clg->a1 < rnd) + clg->state = 1; + + break; + case 2: + if (rnd < clg->a5) { + clg->state = 3; + return true; + } else + clg->state = 2; + + break; + case 3: + if (rnd < clg->a3) + clg->state = 2; + else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { + clg->state = 1; + return true; + } else if (clg->a2 + clg->a3 < rnd) { + clg->state = 3; + return true; + } + break; + case 4: + clg->state = 1; + break; + } + + return false; +} + +/* loss_gilb_ell - Gilbert-Elliot model loss generator + * Generates losses according to the Gilbert-Elliot loss model or + * its special cases (Gilbert or Simple Gilbert) + * + * Makes a comparision between random number and the transition + * probabilities outgoing from the current state, then decides the + * next state. A second random number is extracted and the comparision + * with the loss probability of the current state decides if the next + * packet will be transmitted or lost. + */ +static bool loss_gilb_ell(struct netem_sched_data *q) +{ + struct clgstate *clg = &q->clg; + + switch (clg->state) { + case 1: + if (net_random() < clg->a1) + clg->state = 2; + if (net_random() < clg->a4) + return true; + case 2: + if (net_random() < clg->a2) + clg->state = 1; + if (clg->a3 > net_random()) + return true; + } + + return false; +} + +static bool loss_event(struct netem_sched_data *q) +{ + switch (q->loss_model) { + case CLG_RANDOM: + /* Random packet drop 0 => none, ~0 => all */ + return q->loss && q->loss >= get_crandom(&q->loss_cor); + + case CLG_4_STATES: + /* 4state loss model algorithm (used also for GI model) + * Extracts a value from the markov 4 state loss generator, + * if it is 1 drops a packet and if needed writes the event in + * the kernel logs + */ + return loss_4state(q); + + case CLG_GILB_ELL: + /* Gilbert-Elliot loss model algorithm + * Extracts a value from the Gilbert-Elliot loss generator, + * if it is 1 drops a packet and if needed writes the event in + * the kernel logs + */ + return loss_gilb_ell(q); + } + + return false; /* not reached */ +} + + /* tabledist - return a pseudo-randomly distributed value with mean mu and * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. @@ -167,8 +317,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; - /* Random packet drop 0 => none, ~0 => all */ - if (q->loss && q->loss >= get_crandom(&q->loss_cor)) + /* Drop packet? */ + if (loss_event(q)) --count; if (count == 0) { @@ -385,10 +535,66 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) init_crandom(&q->corrupt_cor, r->correlation); } +static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) +{ + struct netem_sched_data *q = qdisc_priv(sch); + const struct nlattr *la; + int rem; + + nla_for_each_nested(la, attr, rem) { + u16 type = nla_type(la); + + switch(type) { + case NETEM_LOSS_GI: { + const struct tc_netem_gimodel *gi = nla_data(la); + + if (nla_len(la) != sizeof(struct tc_netem_gimodel)) { + pr_info("netem: incorrect gi model size\n"); + return -EINVAL; + } + + q->loss_model = CLG_4_STATES; + + q->clg.state = 1; + q->clg.a1 = gi->p13; + q->clg.a2 = gi->p31; + q->clg.a3 = gi->p32; + q->clg.a4 = gi->p14; + q->clg.a5 = gi->p23; + break; + } + + case NETEM_LOSS_GE: { + const struct tc_netem_gemodel *ge = nla_data(la); + + if (nla_len(la) != sizeof(struct tc_netem_gemodel)) { + pr_info("netem: incorrect gi model size\n"); + return -EINVAL; + } + + q->loss_model = CLG_GILB_ELL; + q->clg.state = 1; + q->clg.a1 = ge->p; + q->clg.a2 = ge->r; + q->clg.a3 = ge->h; + q->clg.a4 = ge->k1; + break; + } + + default: + pr_info("netem: unknown loss type %u\n", type); + return -EINVAL; + } + } + + return 0; +} + static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, + [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -396,11 +602,15 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, { int nested_len = nla_len(nla) - NLA_ALIGN(len); - if (nested_len < 0) + if (nested_len < 0) { + pr_info("netem: invalid attributes len %d\n", nested_len); return -EINVAL; + } + if (nested_len >= nla_attr_size(0)) return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), nested_len, policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; } @@ -456,7 +666,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_NETEM_CORRUPT]) get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); - return 0; + q->loss_model = CLG_RANDOM; + if (tb[TCA_NETEM_LOSS]) + ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); + + return ret; } /* @@ -551,6 +765,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); + q->loss_model = CLG_RANDOM; q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { @@ -575,6 +790,54 @@ static void netem_destroy(struct Qdisc *sch) dist_free(q->delay_dist); } +static int dump_loss_model(const struct netem_sched_data *q, + struct sk_buff *skb) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_NETEM_LOSS); + if (nest == NULL) + goto nla_put_failure; + + switch (q->loss_model) { + case CLG_RANDOM: + /* legacy loss model */ + nla_nest_cancel(skb, nest); + return 0; /* no data */ + + case CLG_4_STATES: { + struct tc_netem_gimodel gi = { + .p13 = q->clg.a1, + .p31 = q->clg.a2, + .p32 = q->clg.a3, + .p14 = q->clg.a4, + .p23 = q->clg.a5, + }; + + NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi); + break; + } + case CLG_GILB_ELL: { + struct tc_netem_gemodel ge = { + .p = q->clg.a1, + .r = q->clg.a2, + .h = q->clg.a3, + .k1 = q->clg.a4, + }; + + NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge); + break; + } + } + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) { const struct netem_sched_data *q = qdisc_priv(sch); @@ -605,6 +868,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) corrupt.correlation = q->corrupt_cor.rho; NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); + if (dump_loss_model(q, skb) != 0) + goto nla_put_failure; + return nla_nest_end(skb, nla); nla_put_failure: -- cgit v1.1 From 250a65f78265940ac33a2dd2002924e6126efe14 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 23 Feb 2011 13:04:22 +0000 Subject: netem: update version and cleanup Get rid of debug message that are not useful, and enable the log messages in case of error. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5bbcccc..28b3f7e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -24,7 +24,7 @@ #include #include -#define VERSION "1.2" +#define VERSION "1.3" /* Network Emulation Queuing algorithm. ==================================== @@ -311,8 +311,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; int count = 1; - pr_debug("netem_enqueue skb=%p\n", skb); - /* Random duplication */ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) ++count; @@ -633,7 +631,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) ret = fifo_set_limit(q->qdisc, qopt->limit); if (ret) { - pr_debug("netem: can't set fifo limit\n"); + pr_info("netem: can't set fifo limit\n"); return ret; } @@ -769,13 +767,13 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { - pr_debug("netem: qdisc create failed\n"); + pr_notice("netem: qdisc create tfifo qdisc failed\n"); return -ENOMEM; } ret = netem_change(sch, opt); if (ret) { - pr_debug("netem: change failed\n"); + pr_info("netem: change failed\n"); qdisc_destroy(q->qdisc); } return ret; -- cgit v1.1 From 26f70e1202b3c66c4f63b8b25e0419dd0b3a91e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 24 Feb 2011 17:45:41 +0000 Subject: sch_choke: add choke_skb_cb Better document choke skb->cb[] use, like we did in netem and sfb This adds a compile time check to make sure we dont exhaust skb->cb[] space. Signed-off-by: Eric Dumazet CC: Stephen Hemminger CC: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_choke.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index ee1e209..06afbae 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -219,14 +219,25 @@ static bool choke_match_flow(struct sk_buff *skb1, return *ports1 == *ports2; } +struct choke_skb_cb { + u16 classid; +}; + +static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(skb->cb) < + sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb)); + return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; +} + static inline void choke_set_classid(struct sk_buff *skb, u16 classid) { - *(unsigned int *)(qdisc_skb_cb(skb)->data) = classid; + choke_skb_cb(skb)->classid = classid; } static u16 choke_get_classid(const struct sk_buff *skb) { - return *(unsigned int *)(qdisc_skb_cb(skb)->data); + return choke_skb_cb(skb)->classid; } /* -- cgit v1.1 From 78776d3f2b2b6d59e32cdaf3f30228a0d9d0b720 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Feb 2011 22:48:13 -0800 Subject: sch_netem: Need to include vmalloc.h Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 28b3f7e..edbbf7a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include -- cgit v1.1 From 96241544ca34721d601925850868188d6304cc0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Thu, 24 Feb 2011 23:14:56 +0000 Subject: Phonet: allow multiple listen() and fix small race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/socket.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 25f746d..ceb5143 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -428,19 +428,19 @@ static int pn_socket_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; int err = 0; - if (sock->state != SS_UNCONNECTED) - return -EINVAL; if (pn_socket_autobind(sock)) return -ENOBUFS; lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) { + if (sock->state != SS_UNCONNECTED) { err = -EINVAL; goto out; } - sk->sk_state = TCP_LISTEN; - sk->sk_ack_backlog = 0; + if (sk->sk_state != TCP_LISTEN) { + sk->sk_state = TCP_LISTEN; + sk->sk_ack_backlog = 0; + } sk->sk_max_ack_backlog = backlog; out: release_sock(sk); -- cgit v1.1 From a8059512b120362b15424f152b2548fe8b11bd0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Thu, 24 Feb 2011 23:14:57 +0000 Subject: Phonet: implement per-socket destination/peer address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 19 ++++++++++++++----- net/phonet/socket.c | 4 ++-- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 1072b2c..30cc676 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -110,6 +110,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol, sk->sk_protocol = protocol; pn = pn_sk(sk); pn->sobject = 0; + pn->dobject = 0; pn->resource = 0; sk->sk_prot->init(sk); err = 0; @@ -242,8 +243,18 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, struct net_device *dev; struct pn_sock *pn = pn_sk(sk); int err; - u16 src; - u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR; + u16 src, dst; + u8 daddr, saddr, res; + + src = pn->sobject; + if (target != NULL) { + dst = pn_sockaddr_get_object(target); + res = pn_sockaddr_get_resource(target); + } else { + dst = pn->dobject; + res = pn->resource; + } + daddr = pn_addr(dst); err = -EHOSTUNREACH; if (sk->sk_bound_dev_if) @@ -271,12 +282,10 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, if (saddr == PN_NO_ADDR) goto drop; - src = pn->sobject; if (!pn_addr(src)) src = pn_object(saddr, pn_obj(src)); - err = pn_send(skb, dev, pn_sockaddr_get_object(target), - src, pn_sockaddr_get_resource(target), 0); + err = pn_send(skb, dev, dst, src, res, 0); dev_put(dev); return err; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index ceb5143..65a0333 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -633,8 +633,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu " "%d %p %d%n", - sk->sk_protocol, pn->sobject, 0, pn->resource, - sk->sk_state, + sk->sk_protocol, pn->sobject, pn->dobject, + pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), sock_i_uid(sk), sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, -- cgit v1.1 From 14ba8faebcc241e4d60a4ef4a7d3fdef1c2e846f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Thu, 24 Feb 2011 23:14:58 +0000 Subject: Phonet: use socket destination in pipe protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 3e60f2e..4fce882 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -50,11 +50,6 @@ #define CREDITS_MAX 10 #define CREDITS_THR 7 -static const struct sockaddr_pn pipe_srv = { - .spn_family = AF_PHONET, - .spn_resource = 0xD9, /* pipe service */ -}; - #define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */ /* Get the next TLV sub-block. */ @@ -88,6 +83,7 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb, const struct pnpipehdr *oph = pnp_hdr(oskb); struct pnpipehdr *ph; struct sk_buff *skb; + struct sockaddr_pn peer; skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); if (!skb) @@ -105,7 +101,8 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb, ph->pipe_handle = oph->pipe_handle; ph->error_code = code; - return pn_skb_send(sk, skb, &pipe_srv); + pn_skb_get_src_sockaddr(oskb, &peer); + return pn_skb_send(sk, skb, &peer); } #define PAD 0x00 @@ -220,7 +217,7 @@ static int pipe_handler_send_req(struct sock *sk, u8 utid, ph->pipe_handle = pn->pipe_handle; ph->error_code = PN_PIPE_NO_ERROR; - return pn_skb_send(sk, skb, &pn->remote_pep); + return pn_skb_send(sk, skb, NULL); } static int pipe_handler_send_created_ind(struct sock *sk, @@ -262,7 +259,7 @@ static int pipe_handler_send_created_ind(struct sock *sk, ph->pipe_handle = pn->pipe_handle; ph->error_code = err_code; - return pn_skb_send(sk, skb, &pn->remote_pep); + return pn_skb_send(sk, skb, NULL); } static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) @@ -295,7 +292,7 @@ static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) ph->pipe_handle = pn->pipe_handle; ph->error_code = err_code; - return pn_skb_send(sk, skb, &pn->remote_pep); + return pn_skb_send(sk, skb, NULL); } static int pipe_handler_enable_pipe(struct sock *sk, int enable) @@ -396,11 +393,7 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) ph->data[3] = PAD; ph->data[4] = status; -#ifdef CONFIG_PHONET_PIPECTRLR - return pn_skb_send(sk, skb, &pn->remote_pep); -#else - return pn_skb_send(sk, skb, &pipe_srv); -#endif + return pn_skb_send(sk, skb, NULL); } /* Send our RX flow control information to the sender. @@ -722,7 +715,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) struct sock *newsk; struct pep_sock *newpn, *pn = pep_sk(sk); struct pnpipehdr *hdr; - struct sockaddr_pn dst; + struct sockaddr_pn dst, src; u16 peer_type; u8 pipe_handle, enabled, n_sb; u8 aligned = 0; @@ -789,8 +782,10 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) newpn = pep_sk(newsk); pn_skb_get_dst_sockaddr(skb, &dst); + pn_skb_get_src_sockaddr(skb, &src); newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); - newpn->pn_sk.resource = pn->pn_sk.resource; + newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); + newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst); skb_queue_head_init(&newpn->ctrlreq_queue); newpn->pipe_handle = pipe_handle; atomic_set(&newpn->tx_credits, 0); @@ -925,7 +920,7 @@ static int pipe_do_remove(struct sock *sk) ph->pipe_handle = pn->pipe_handle; ph->data[0] = PAD; - return pn_skb_send(sk, skb, &pipe_srv); + return pn_skb_send(sk, skb, NULL); } /* associated socket ceases to exist */ @@ -1042,10 +1037,10 @@ out: static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) { struct pep_sock *pn = pep_sk(sk); - struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; - - memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn)); + const struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + pn->pn_sk.dobject = pn_sockaddr_get_object(spn); + pn->pn_sk.resource = pn_sockaddr_get_resource(spn); return pipe_handler_send_req(sk, PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ, GFP_ATOMIC); @@ -1222,11 +1217,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) } else ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; -#ifdef CONFIG_PHONET_PIPECTRLR - err = pn_skb_send(sk, skb, &pn->remote_pep); -#else - err = pn_skb_send(sk, skb, &pipe_srv); -#endif + err = pn_skb_send(sk, skb, NULL); if (err && pn_flow_safe(pn->tx_fc)) atomic_inc(&pn->tx_credits); -- cgit v1.1 From 2feb61816f7f0be57f4bc61137555e9a8cb4f322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Thu, 24 Feb 2011 23:14:59 +0000 Subject: Phonet: remove redumdant pep->pipe_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sk->sk_state already contains the pipe state. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 4fce882..15775a7 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -527,7 +527,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_DISCONNECT_RESP: - pn->pipe_state = PIPE_IDLE; sk->sk_state = TCP_CLOSE; break; #endif @@ -539,7 +538,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_ENABLE_RESP: - pn->pipe_state = PIPE_ENABLED; pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID, PNS_PIPE_ENABLED_IND); @@ -574,7 +572,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_DISABLE_RESP: - pn->pipe_state = PIPE_DISABLED; atomic_set(&pn->tx_credits, 0); pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID, PNS_PIPE_DISABLED_IND); @@ -692,7 +689,6 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) remote_pref_rx_fc, sizeof(host_pref_rx_fc)); - pn->pipe_state = PIPE_DISABLED; sk->sk_state = TCP_SYN_RECV; sk->sk_backlog_rcv = pipe_do_rcv; sk->sk_destruct = pipe_destruct; @@ -941,21 +937,18 @@ static void pep_sock_close(struct sock *sk, long timeout) sk_for_each_safe(sknode, p, n, &pn->ackq) sk_del_node_init(sknode); sk->sk_state = TCP_CLOSE; - } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) + } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) { +#ifndef CONFIG_PHONET_PIPECTRLR /* Forcefully remove dangling Phonet pipe */ pipe_do_remove(sk); - -#ifdef CONFIG_PHONET_PIPECTRLR - if (pn->pipe_state != PIPE_IDLE) { +#else /* send pep disconnect request */ pipe_handler_send_req(sk, PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ, GFP_KERNEL); - - pn->pipe_state = PIPE_IDLE; sk->sk_state = TCP_CLOSE; - } #endif + } ifindex = pn->ifindex; pn->ifindex = 0; @@ -1101,10 +1094,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_PHONET_PIPECTRLR case PNPIPE_PIPE_HANDLE: if (val) { - if (pn->pipe_state > PIPE_IDLE) { - err = -EFAULT; - break; - } pn->pipe_handle = val; break; } @@ -1138,7 +1127,7 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_PHONET_PIPECTRLR case PNPIPE_ENABLE: - if (pn->pipe_state <= PIPE_IDLE) { + if ((1 << sk->sk_state) & ~(TCPF_SYN_RECV|TCPF_ESTABLISHED)) { err = -ENOTCONN; break; } @@ -1177,9 +1166,7 @@ static int pep_getsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_PHONET_PIPECTRLR case PNPIPE_ENABLE: - if (pn->pipe_state <= PIPE_IDLE) - return -ENOTCONN; - val = pn->pipe_state != PIPE_DISABLED; + val = sk->sk_state == TCP_ESTABLISHED; break; #endif -- cgit v1.1 From 0165d69bcb18c5aa220538389c872852243f9725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Thu, 24 Feb 2011 23:15:00 +0000 Subject: Phonet: don't bother with transaction IDs (especially for indications) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 49 ++++++++++++++----------------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 15775a7..0ecab59 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -172,8 +172,7 @@ static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb, return 0; } -static int pipe_handler_send_req(struct sock *sk, u8 utid, - u8 msg_id, gfp_t priority) +static int pipe_handler_send_req(struct sock *sk, u8 msg_id, gfp_t priority) { int len; struct pnpipehdr *ph; @@ -212,7 +211,7 @@ static int pipe_handler_send_req(struct sock *sk, u8 utid, __skb_push(skb, sizeof(*ph)); skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = utid; + ph->utid = msg_id; /* whatever */ ph->message_id = msg_id; ph->pipe_handle = pn->pipe_handle; ph->error_code = PN_PIPE_NO_ERROR; @@ -220,8 +219,7 @@ static int pipe_handler_send_req(struct sock *sk, u8 utid, return pn_skb_send(sk, skb, NULL); } -static int pipe_handler_send_created_ind(struct sock *sk, - u8 utid, u8 msg_id) +static int pipe_handler_send_created_ind(struct sock *sk, u8 msg_id) { int err_code; struct pnpipehdr *ph; @@ -254,7 +252,7 @@ static int pipe_handler_send_created_ind(struct sock *sk, __skb_push(skb, sizeof(*ph)); skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = utid; + ph->utid = 0; ph->message_id = msg_id; ph->pipe_handle = pn->pipe_handle; ph->error_code = err_code; @@ -262,7 +260,7 @@ static int pipe_handler_send_created_ind(struct sock *sk, return pn_skb_send(sk, skb, NULL); } -static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) +static int pipe_handler_send_ind(struct sock *sk, u8 msg_id) { int err_code; struct pnpipehdr *ph; @@ -287,7 +285,7 @@ static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) __skb_push(skb, sizeof(*ph)); skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = utid; + ph->utid = 0; ph->message_id = msg_id; ph->pipe_handle = pn->pipe_handle; ph->error_code = err_code; @@ -297,16 +295,9 @@ static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) static int pipe_handler_enable_pipe(struct sock *sk, int enable) { - int utid, req; - - if (enable) { - utid = PNS_PIPE_ENABLE_UTID; - req = PNS_PEP_ENABLE_REQ; - } else { - utid = PNS_PIPE_DISABLE_UTID; - req = PNS_PEP_DISABLE_REQ; - } - return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC); + u8 id = enable ? PNS_PEP_ENABLE_REQ : PNS_PEP_DISABLE_REQ; + + return pipe_handler_send_req(sk, id, GFP_KERNEL); } #endif @@ -538,8 +529,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_ENABLE_RESP: - pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID, - PNS_PIPE_ENABLED_IND); + pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND); if (!pn_flow_safe(pn->tx_fc)) { atomic_set(&pn->tx_credits, 1); @@ -573,8 +563,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_DISABLE_RESP: atomic_set(&pn->tx_credits, 0); - pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID, - PNS_PIPE_DISABLED_IND); + pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND); sk->sk_state = TCP_SYN_RECV; pn->rx_credits = 0; break; @@ -678,7 +667,6 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1}; u8 remote_pref_rx_fc[3], remote_req_tx_fc[3]; u8 negotiated_rx_fc, negotiated_tx_fc; - int ret; pipe_get_flow_info(sk, skb, remote_pref_rx_fc, remote_req_tx_fc); @@ -697,12 +685,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) pn->tx_fc = negotiated_tx_fc; sk->sk_state_change(sk); - ret = pipe_handler_send_created_ind(sk, - PNS_PIPE_CREATED_IND_UTID, - PNS_PIPE_CREATED_IND - ); - - return ret; + return pipe_handler_send_created_ind(sk, PNS_PIPE_CREATED_IND); } #endif @@ -943,9 +926,7 @@ static void pep_sock_close(struct sock *sk, long timeout) pipe_do_remove(sk); #else /* send pep disconnect request */ - pipe_handler_send_req(sk, - PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ, - GFP_KERNEL); + pipe_handler_send_req(sk, PNS_PEP_DISCONNECT_REQ, GFP_KERNEL); sk->sk_state = TCP_CLOSE; #endif } @@ -1034,9 +1015,7 @@ static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) pn->pn_sk.dobject = pn_sockaddr_get_object(spn); pn->pn_sk.resource = pn_sockaddr_get_resource(spn); - return pipe_handler_send_req(sk, - PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ, - GFP_ATOMIC); + return pipe_handler_send_req(sk, PNS_PEP_CONNECT_REQ, GFP_KERNEL); } #endif -- cgit v1.1 From 8f44fcc72a454c5eb7cbc138bd53f0963f23e87f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Thu, 24 Feb 2011 23:15:01 +0000 Subject: Phonet: fix flawed "SYN/ACK" logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Do not fail if the peer supports more or less than 3 algorithms. * Ignore unknown congestion control algorithms instead of failing. * Simplify congestion algorithm negotiation (largest is best). * Do not use a static buffer. * Fix off-by-two read overflow. * Avoid extra memory copy (in addition to skb_copy_bits()). The previous code really made no sense. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 125 +++++++++++++++++++++---------------------------------- 1 file changed, 47 insertions(+), 78 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 0ecab59..b8c31fc 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -108,70 +108,6 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb, #define PAD 0x00 #ifdef CONFIG_PHONET_PIPECTRLR -static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len) -{ - int i, j; - u8 base_fc, final_fc; - - for (i = 0; i < len; i++) { - base_fc = host_fc[i]; - for (j = 0; j < len; j++) { - if (remote_fc[j] == base_fc) { - final_fc = base_fc; - goto done; - } - } - } - return -EINVAL; - -done: - return final_fc; - -} - -static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb, - u8 *pref_rx_fc, u8 *req_tx_fc) -{ - struct pnpipehdr *hdr; - u8 n_sb; - - if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) - return -EINVAL; - - hdr = pnp_hdr(skb); - n_sb = hdr->data[4]; - - __skb_pull(skb, sizeof(*hdr) + 4); - while (n_sb > 0) { - u8 type, buf[3], len = sizeof(buf); - u8 *data = pep_get_sb(skb, &type, &len, buf); - - if (data == NULL) - return -EINVAL; - - switch (type) { - case PN_PIPE_SB_REQUIRED_FC_TX: - if (len < 3 || (data[2] | data[3] | data[4]) > 3) - break; - req_tx_fc[0] = data[2]; - req_tx_fc[1] = data[3]; - req_tx_fc[2] = data[4]; - break; - - case PN_PIPE_SB_PREFERRED_FC_RX: - if (len < 3 || (data[2] | data[3] | data[4]) > 3) - break; - pref_rx_fc[0] = data[2]; - pref_rx_fc[1] = data[3]; - pref_rx_fc[2] = data[4]; - break; - - } - n_sb--; - } - return 0; -} - static int pipe_handler_send_req(struct sock *sk, u8 msg_id, gfp_t priority) { int len; @@ -661,28 +597,61 @@ static void pipe_destruct(struct sock *sk) } #ifdef CONFIG_PHONET_PIPECTRLR +static u8 pipe_negotiate_fc(const u8 *fcs, unsigned n) +{ + unsigned i; + u8 final_fc = PN_NO_FLOW_CONTROL; + + for (i = 0; i < n; i++) { + u8 fc = fcs[i]; + + if (fc > final_fc && fc < PN_MAX_FLOW_CONTROL) + final_fc = fc; + } + return final_fc; +} + static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); - u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1}; - u8 remote_pref_rx_fc[3], remote_req_tx_fc[3]; - u8 negotiated_rx_fc, negotiated_tx_fc; - - pipe_get_flow_info(sk, skb, remote_pref_rx_fc, - remote_req_tx_fc); - negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc, - host_pref_rx_fc, - sizeof(host_pref_rx_fc)); - negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc, - remote_pref_rx_fc, - sizeof(host_pref_rx_fc)); + struct pnpipehdr *hdr; + u8 n_sb; + + if (!pskb_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + hdr = pnp_hdr(skb); + + /* Parse sub-blocks */ + n_sb = hdr->data[4]; + while (n_sb > 0) { + u8 type, buf[6], len = sizeof(buf); + const u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + + switch (type) { + case PN_PIPE_SB_REQUIRED_FC_TX: + if (len < 2 || len < data[0]) + break; + pn->tx_fc = pipe_negotiate_fc(data + 2, len - 2); + break; + + case PN_PIPE_SB_PREFERRED_FC_RX: + if (len < 2 || len < data[0]) + break; + pn->rx_fc = pipe_negotiate_fc(data + 2, len - 2); + break; + + } + n_sb--; + } sk->sk_state = TCP_SYN_RECV; sk->sk_backlog_rcv = pipe_do_rcv; sk->sk_destruct = pipe_destruct; pn->rx_credits = 0; - pn->rx_fc = negotiated_rx_fc; - pn->tx_fc = negotiated_tx_fc; sk->sk_state_change(sk); return pipe_handler_send_created_ind(sk, PNS_PIPE_CREATED_IND); -- cgit v1.1 From 004971353a403d75e7d50f8b3b304272ef056248 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 25 Feb 2011 11:23:22 -0800 Subject: phonet: Protect pipe_do_remove() with appropriate ifdefs. It is only used when CONFIG_PHONET_PIPECTRLR is not set. Signed-off-by: David S. Miller --- net/phonet/pep.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index b8c31fc..875e86c 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -849,6 +849,7 @@ drop: return err; } +#ifndef CONFIG_PHONET_PIPECTRLR static int pipe_do_remove(struct sock *sk) { struct pep_sock *pn = pep_sk(sk); @@ -870,6 +871,7 @@ static int pipe_do_remove(struct sock *sk) return pn_skb_send(sk, skb, NULL); } +#endif /* associated socket ceases to exist */ static void pep_sock_close(struct sock *sk, long timeout) -- cgit v1.1 From 7bb4568372856688bc070917265bce0b88bb7d4d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 24 Feb 2011 14:42:06 +0100 Subject: mac80211: make tx() operation return void The return value of the tx operation is commonly misused by drivers, leading to errors. All drivers will drop frames if they fail to TX the frame, and they must also properly manage the queues (if they didn't, mac80211 would already warn). Removing the ability for drivers to return a BUSY value also allows significant cleanups of the TX TX handling code in mac80211. Note that this also fixes a bug in ath9k_htc, the old "return -1" there was wrong. Signed-off-by: Johannes Berg Tested-by: Sedat Dilek [ath5k] Acked-by: Gertjan van Wingerde [rt2x00] Acked-by: Larry Finger [b43, rtl8187, rtlwifi] Acked-by: Luciano Coelho [wl12xx] Signed-off-by: John W. Linville --- net/mac80211/driver-ops.h | 4 +- net/mac80211/tx.c | 164 ++++++++++++++++------------------------------ 2 files changed, 58 insertions(+), 110 deletions(-) (limited to 'net') diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 78af32d..32f05c1 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -5,9 +5,9 @@ #include "ieee80211_i.h" #include "driver-trace.h" -static inline int drv_tx(struct ieee80211_local *local, struct sk_buff *skb) +static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb) { - return local->ops->tx(&local->hw, skb); + local->ops->tx(&local->hw, skb); } static inline int drv_start(struct ieee80211_local *local) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 34edf7f..081dcaf 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -33,10 +33,6 @@ #include "wme.h" #include "rate.h" -#define IEEE80211_TX_OK 0 -#define IEEE80211_TX_AGAIN 1 -#define IEEE80211_TX_PENDING 2 - /* misc utils */ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, @@ -1285,16 +1281,17 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } -static int __ieee80211_tx(struct ieee80211_local *local, - struct sk_buff **skbp, - struct sta_info *sta, - bool txpending) +/* + * Returns false if the frame couldn't be transmitted but was queued instead. + */ +static bool __ieee80211_tx(struct ieee80211_local *local, struct sk_buff **skbp, + struct sta_info *sta, bool txpending) { struct sk_buff *skb = *skbp, *next; struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata; unsigned long flags; - int ret, len; + int len; bool fragm = false; while (skb) { @@ -1302,13 +1299,37 @@ static int __ieee80211_tx(struct ieee80211_local *local, __le16 fc; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - ret = IEEE80211_TX_OK; if (local->queue_stop_reasons[q] || - (!txpending && !skb_queue_empty(&local->pending[q]))) - ret = IEEE80211_TX_PENDING; + (!txpending && !skb_queue_empty(&local->pending[q]))) { + /* + * Since queue is stopped, queue up frames for later + * transmission from the tx-pending tasklet when the + * queue is woken again. + */ + + do { + next = skb->next; + skb->next = NULL; + /* + * NB: If txpending is true, next must already + * be NULL since we must've gone through this + * loop before already; therefore we can just + * queue the frame to the head without worrying + * about reordering of fragments. + */ + if (unlikely(txpending)) + __skb_queue_head(&local->pending[q], + skb); + else + __skb_queue_tail(&local->pending[q], + skb); + } while ((skb = next)); + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, + flags); + return false; + } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - if (ret != IEEE80211_TX_OK) - return ret; info = IEEE80211_SKB_CB(skb); @@ -1343,15 +1364,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, info->control.sta = NULL; fc = ((struct ieee80211_hdr *)skb->data)->frame_control; - ret = drv_tx(local, skb); - if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) { - dev_kfree_skb(skb); - ret = NETDEV_TX_OK; - } - if (ret != NETDEV_TX_OK) { - info->control.vif = &sdata->vif; - return IEEE80211_TX_AGAIN; - } + drv_tx(local, skb); ieee80211_tpt_led_trig_tx(local, fc, len); *skbp = skb = next; @@ -1359,7 +1372,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, fragm = true; } - return IEEE80211_TX_OK; + return true; } /* @@ -1419,23 +1432,24 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) return 0; } -static void ieee80211_tx(struct ieee80211_sub_if_data *sdata, +/* + * Returns false if the frame couldn't be transmitted but was queued instead. + */ +static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bool txpending) { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_data tx; ieee80211_tx_result res_prepare; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct sk_buff *next; - unsigned long flags; - int ret, retries; u16 queue; + bool result = true; queue = skb_get_queue_mapping(skb); if (unlikely(skb->len < 10)) { dev_kfree_skb(skb); - return; + return true; } rcu_read_lock(); @@ -1445,85 +1459,19 @@ static void ieee80211_tx(struct ieee80211_sub_if_data *sdata, if (unlikely(res_prepare == TX_DROP)) { dev_kfree_skb(skb); - rcu_read_unlock(); - return; + goto out; } else if (unlikely(res_prepare == TX_QUEUED)) { - rcu_read_unlock(); - return; + goto out; } tx.channel = local->hw.conf.channel; info->band = tx.channel->band; - if (invoke_tx_handlers(&tx)) - goto out; - - retries = 0; - retry: - ret = __ieee80211_tx(local, &tx.skb, tx.sta, txpending); - switch (ret) { - case IEEE80211_TX_OK: - break; - case IEEE80211_TX_AGAIN: - /* - * Since there are no fragmented frames on A-MPDU - * queues, there's no reason for a driver to reject - * a frame there, warn and drop it. - */ - if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) - goto drop; - /* fall through */ - case IEEE80211_TX_PENDING: - skb = tx.skb; - - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - - if (local->queue_stop_reasons[queue] || - !skb_queue_empty(&local->pending[queue])) { - /* - * if queue is stopped, queue up frames for later - * transmission from the tasklet - */ - do { - next = skb->next; - skb->next = NULL; - if (unlikely(txpending)) - __skb_queue_head(&local->pending[queue], - skb); - else - __skb_queue_tail(&local->pending[queue], - skb); - } while ((skb = next)); - - spin_unlock_irqrestore(&local->queue_stop_reason_lock, - flags); - } else { - /* - * otherwise retry, but this is a race condition or - * a driver bug (which we warn about if it persists) - */ - spin_unlock_irqrestore(&local->queue_stop_reason_lock, - flags); - - retries++; - if (WARN(retries > 10, "tx refused but queue active\n")) - goto drop; - goto retry; - } - } + if (!invoke_tx_handlers(&tx)) + result = __ieee80211_tx(local, &tx.skb, tx.sta, txpending); out: rcu_read_unlock(); - return; - - drop: - rcu_read_unlock(); - - skb = tx.skb; - while (skb) { - next = skb->next; - dev_kfree_skb(skb); - skb = next; - } + return result; } /* device xmit handlers */ @@ -2070,6 +2018,11 @@ void ieee80211_clear_tx_pending(struct ieee80211_local *local) skb_queue_purge(&local->pending[i]); } +/* + * Returns false if the frame couldn't be transmitted but was queued instead, + * which in this case means re-queued -- take as an indication to stop sending + * more pending frames. + */ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, struct sk_buff *skb) { @@ -2077,20 +2030,17 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; struct sta_info *sta; struct ieee80211_hdr *hdr; - int ret; - bool result = true; + bool result; sdata = vif_to_sdata(info->control.vif); if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) { - ieee80211_tx(sdata, skb, true); + result = ieee80211_tx(sdata, skb, true); } else { hdr = (struct ieee80211_hdr *)skb->data; sta = sta_info_get(sdata, hdr->addr1); - ret = __ieee80211_tx(local, &skb, sta, true); - if (ret != IEEE80211_TX_OK) - result = false; + result = __ieee80211_tx(local, &skb, sta, true); } return result; @@ -2132,8 +2082,6 @@ void ieee80211_tx_pending(unsigned long data) flags); txok = ieee80211_tx_pending_skb(local, skb); - if (!txok) - __skb_queue_head(&local->pending[i], skb); spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (!txok) -- cgit v1.1 From 2973773775ec05d18e4b942a28604120cb15bbf2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 24 Feb 2011 14:46:13 +0100 Subject: mac80211: remove IBSS merge delay This reverts 4a332a38 ("mac80211: Give it some time to do the TSF sync"). There's no point in waiting with a new IBSS merge just because the hardware hasn't merged up with the old IBSS yet, and since 34e8f082 we no longer attempt to merge with the IBSS we're already in. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 463271f..3e81af1 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -31,7 +31,6 @@ #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) -#define IEEE80211_IBSS_MERGE_DELAY 0x400000 #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) #define IEEE80211_IBSS_MAX_STA_ENTRIES 128 @@ -397,10 +396,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, jiffies); #endif - /* give slow hardware some time to do the TSF sync */ - if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY) - goto put_bss; - if (beacon_timestamp > rx_timestamp) { #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: beacon TSF higher than " -- cgit v1.1 From 90b4ca9dba87bef9a3352c3d5bcab998be70fc4f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Feb 2011 12:24:10 +0100 Subject: mac80211: copy peer MCS TX parameters We need to copy this to allow drivers to look at the information where needed. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ht.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 75d679d..b9e4b9b 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -66,6 +66,9 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, /* own MCS TX capabilities */ tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; + /* Copy peer MCS TX capabilities, the driver might need them. */ + ht_cap->mcs.tx_params = ht_cap_ie->mcs.tx_params; + /* can we TX with MCS rates? */ if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) return; @@ -79,7 +82,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS; /* - * 802.11n D5.0 20.3.5 / 20.6 says: + * 802.11n-2009 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream * - 8 to 31 are multiple spatial streams using equal modulation * [8..15 for two streams, 16..23 for three and 24..31 for four] -- cgit v1.1 From 8628172f45c839376bf2b70bbd326d56e68dadc3 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 25 Feb 2011 14:46:02 +0100 Subject: mac80211: better fix for conn_mon_timer running after disassociate Is still possible to schedule conn_mon_timer after disassociate from ieee80211_sta_tx_notify() and ieee80211_offchannel_ps_disable(). Move disassociate check to ieee80211_sta_reset_conn_monitor() to cover all these cases, and add unlikely since in most the time we call ieee80211_sta_reset_conn_monitor() when associated. Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index abb0116..cc984bd 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -145,6 +145,9 @@ void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + if (unlikely(!sdata->u.mgd.associated)) + return; + if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) return; @@ -1083,12 +1086,6 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, if (is_multicast_ether_addr(hdr->addr1)) return; - /* - * In case we receive frames after disassociation. - */ - if (!sdata->u.mgd.associated) - return; - ieee80211_sta_reset_conn_monitor(sdata); } -- cgit v1.1 From 5f16a43617d46cf255a66f4dc193a7f5b2540aaf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Feb 2011 15:36:57 +0100 Subject: mac80211: support direct offchannel TX offload For devices supported by iwlwifi sometimes off-channel transmissions need to be handled by the device completely. To support this mac80211 needs to pass the frame directly to the driver and not through the TX path as the driver needs the frame and channel information at the same time. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 39 +++++++++++++++++++++++++++++++++++++++ net/mac80211/driver-ops.h | 31 +++++++++++++++++++++++++++++++ net/mac80211/driver-trace.h | 33 +++++++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/status.c | 4 ++++ 5 files changed, 108 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 140503d..8b436c7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1800,6 +1800,33 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, *cookie = (unsigned long) skb; + if (is_offchan && local->ops->offchannel_tx) { + int ret; + + IEEE80211_SKB_CB(skb)->band = chan->band; + + mutex_lock(&local->mtx); + + if (local->hw_offchan_tx_cookie) { + mutex_unlock(&local->mtx); + return -EBUSY; + } + + /* TODO: bitrate control, TX processing? */ + ret = drv_offchannel_tx(local, skb, chan, channel_type, wait); + + if (ret == 0) + local->hw_offchan_tx_cookie = *cookie; + mutex_unlock(&local->mtx); + + /* + * Allow driver to return 1 to indicate it wants to have the + * frame transmitted with a remain_on_channel + regular TX. + */ + if (ret != 1) + return ret; + } + if (is_offchan && local->ops->remain_on_channel) { unsigned int duration; int ret; @@ -1886,6 +1913,18 @@ static int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, mutex_lock(&local->mtx); + if (local->ops->offchannel_tx_cancel_wait && + local->hw_offchan_tx_cookie == cookie) { + ret = drv_offchannel_tx_cancel_wait(local); + + if (!ret) + local->hw_offchan_tx_cookie = 0; + + mutex_unlock(&local->mtx); + + return ret; + } + if (local->ops->cancel_remain_on_channel) { cookie ^= 2; ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 32f05c1..3729296 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -495,4 +495,35 @@ static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local) return ret; } +static inline int drv_offchannel_tx(struct ieee80211_local *local, + struct sk_buff *skb, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int wait) +{ + int ret; + + might_sleep(); + + trace_drv_offchannel_tx(local, skb, chan, channel_type, wait); + ret = local->ops->offchannel_tx(&local->hw, skb, chan, + channel_type, wait); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local) +{ + int ret; + + might_sleep(); + + trace_drv_offchannel_tx_cancel_wait(local); + ret = local->ops->offchannel_tx_cancel_wait(&local->hw); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index e5cce19..520fe24 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -884,6 +884,39 @@ DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel, TP_ARGS(local) ); +TRACE_EVENT(drv_offchannel_tx, + TP_PROTO(struct ieee80211_local *local, struct sk_buff *skb, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + unsigned int wait), + + TP_ARGS(local, skb, chan, channel_type, wait), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, center_freq) + __field(int, channel_type) + __field(unsigned int, wait) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->center_freq = chan->center_freq; + __entry->channel_type = channel_type; + __entry->wait = wait; + ), + + TP_printk( + LOCAL_PR_FMT " freq:%dMHz, wait:%dms", + LOCAL_PR_ARG, __entry->center_freq, __entry->wait + ) +); + +DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait, + TP_PROTO(struct ieee80211_local *local), + TP_ARGS(local) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 0a570a1..a404017 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -957,6 +957,7 @@ struct ieee80211_local { unsigned int hw_roc_duration; u32 hw_roc_cookie; bool hw_roc_for_tx; + unsigned long hw_offchan_tx_cookie; /* dummy netdev for use w/ NAPI */ struct net_device napi_dev; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8651851..b936dd2 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -341,6 +341,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) cookie = local->hw_roc_cookie ^ 2; local->hw_roc_skb_for_status = NULL; } + + if (cookie == local->hw_offchan_tx_cookie) + local->hw_offchan_tx_cookie = 0; + cfg80211_mgmt_tx_status( skb->dev, cookie, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); -- cgit v1.1 From 3b193ade594e4f2d501d4c3a9f43d49176f03230 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 25 Feb 2011 05:45:16 +0000 Subject: dccp: newdp is declared/assigned but never be used Declaration and assignment of newdp is removed. Usage of dccp_sk() exhibit no side effects. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index dca711d..460d545 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -484,7 +484,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct inet_sock *newinet; - struct dccp_sock *newdp; struct dccp6_sock *newdp6; struct sock *newsk; struct ipv6_txoptions *opt; @@ -498,7 +497,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, return NULL; newdp6 = (struct dccp6_sock *)newsk; - newdp = dccp_sk(newsk); newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); @@ -578,7 +576,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; - newdp = dccp_sk(newsk); newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); -- cgit v1.1 From a5f5e3689c8682e06ba155676d69ccf3f4172cb4 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 25 Feb 2011 05:45:17 +0000 Subject: ipv6: totlen is declared and assigned but not used Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2600e22..25a2647 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -274,13 +274,10 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *hdr; - int totlen; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; - totlen = len + sizeof(struct ipv6hdr); - skb_reset_network_header(skb); skb_put(skb, sizeof(struct ipv6hdr)); hdr = ipv6_hdr(skb); -- cgit v1.1 From 96d796a38e9ec9a7c04a6cda3fc15d79efebb008 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 25 Feb 2011 05:45:18 +0000 Subject: ipv6: hash is calculated but not used afterwards hash is declared and assigned but not used anymore. ipv6_addr_hash() exhibit no side-effects. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fd6782e..3daaf3c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -718,12 +718,9 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) struct inet6_ifaddr *ifa, *ifn; struct inet6_dev *idev = ifp->idev; int state; - int hash; int deleted = 0, onlink = 0; unsigned long expires = jiffies; - hash = ipv6_addr_hash(&ifp->addr); - spin_lock_bh(&ifp->state_lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; -- cgit v1.1 From e9476e95d8707d1567d1af60df2c1f19630219a3 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 25 Feb 2011 05:45:19 +0000 Subject: ipv6: variable next is never used in this function Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f786aed..7e9443f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1075,11 +1075,9 @@ out: int icmp6_dst_gc(void) { - struct dst_entry *dst, *next, **pprev; + struct dst_entry *dst, **pprev; int more = 0; - next = NULL; - spin_lock_bh(&icmp6_dst_lock); pprev = &icmp6_dst_gc_list; -- cgit v1.1 From ddc3731fcb712646e4a0f8e6117af6a153e9d36f Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 25 Feb 2011 05:45:20 +0000 Subject: ipv6: ignore rtnl_unicast() return code rtnl_unicast() return value is not of interest, we can silently ignore it, save some instructions and four byte on the stack. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0e1d53b..618f67cc 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1039,7 +1039,6 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (ipv6_hdr(skb)->version == 0) { - int err; struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { @@ -1050,7 +1049,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).pid); } else ip6_mr_forward(net, mrt, skb, c); } -- cgit v1.1 From 52bc97470e22e67f11b054e51a31eee100ef6867 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 25 Feb 2011 05:45:21 +0000 Subject: sched: protocol only needed when CONFIG_NET_CLS_ACT is enabled Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/sched/sch_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1507415..7490f3f 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1672,12 +1672,12 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { int err = 0; - __be16 protocol; #ifdef CONFIG_NET_CLS_ACT + __be16 protocol; struct tcf_proto *otp = tp; reclassify: -#endif protocol = skb->protocol; +#endif err = tc_classify_compat(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT -- cgit v1.1 From 0ed54dad52e8056f4440da723a4c117f2aef1f68 Mon Sep 17 00:00:00 2001 From: Anand Gadiyar Date: Tue, 22 Feb 2011 12:43:26 +0530 Subject: Bluetooth: remove unnecessary call to hci_sock_cleanup hci_sock_cleanup is already called after the sock_err label. It appears that we can drop this call. Signed-off-by: Anand Gadiyar Signed-off-by: Gustavo F. Padovan --- net/bluetooth/af_bluetooth.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 88af9eb..8add9b4 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -550,10 +550,8 @@ static int __init bt_init(void) goto error; err = l2cap_init(); - if (err < 0) { - hci_sock_cleanup(); + if (err < 0) goto sock_err; - } err = sco_init(); if (err < 0) { -- cgit v1.1 From 50899e8d3a1b0655087838374a51ee5b865961b6 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Tue, 22 Feb 2011 12:30:53 -0300 Subject: Bluetooth: Remove duplicated BT_INFO() from L2CAP The message for the initialization of the L2CAP layer was being printed twice. Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index efcef0d..1db6c90 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4033,8 +4033,6 @@ int __init l2cap_init(void) BT_ERR("Failed to create L2CAP debug file"); } - BT_INFO("L2CAP socket layer initialized"); - return 0; error: -- cgit v1.1 From 4c93fbb0626080d196fb461c859b24a1feec3270 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 25 Feb 2011 18:07:06 -0800 Subject: pfkey: Use const where possible. This actually pointed out a (seemingly known) bug where we mangle the pfkey header in a potentially shared SKB, which is fixed here. Signed-off-by: David S. Miller --- net/key/af_key.c | 201 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 107 insertions(+), 94 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 5637285..7fb5457 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -70,7 +70,7 @@ static inline struct pfkey_sock *pfkey_sk(struct sock *sk) return (struct pfkey_sock *)sk; } -static int pfkey_can_dump(struct sock *sk) +static int pfkey_can_dump(const struct sock *sk) { if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf) return 1; @@ -303,12 +303,13 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) return rc; } -static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig) +static inline void pfkey_hdr_dup(struct sadb_msg *new, + const struct sadb_msg *orig) { *new = *orig; } -static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk) +static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) { struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); struct sadb_msg *hdr; @@ -369,13 +370,13 @@ static u8 sadb_ext_min_len[] = { }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ -static int verify_address_len(void *p) +static int verify_address_len(const void *p) { - struct sadb_address *sp = p; - struct sockaddr *addr = (struct sockaddr *)(sp + 1); - struct sockaddr_in *sin; + const struct sadb_address *sp = p; + const struct sockaddr *addr = (const struct sockaddr *)(sp + 1); + const struct sockaddr_in *sin; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct sockaddr_in6 *sin6; + const struct sockaddr_in6 *sin6; #endif int len; @@ -411,16 +412,16 @@ static int verify_address_len(void *p) return 0; } -static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) +static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx) { return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + sec_ctx->sadb_x_ctx_len, sizeof(uint64_t)); } -static inline int verify_sec_ctx_len(void *p) +static inline int verify_sec_ctx_len(const void *p) { - struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p; + const struct sadb_x_sec_ctx *sec_ctx = p; int len = sec_ctx->sadb_x_ctx_len; if (len > PAGE_SIZE) @@ -434,7 +435,7 @@ static inline int verify_sec_ctx_len(void *p) return 0; } -static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx) +static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; @@ -455,16 +456,16 @@ static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb return uctx; } -static int present_and_same_family(struct sadb_address *src, - struct sadb_address *dst) +static int present_and_same_family(const struct sadb_address *src, + const struct sadb_address *dst) { - struct sockaddr *s_addr, *d_addr; + const struct sockaddr *s_addr, *d_addr; if (!src || !dst) return 0; - s_addr = (struct sockaddr *)(src + 1); - d_addr = (struct sockaddr *)(dst + 1); + s_addr = (const struct sockaddr *)(src + 1); + d_addr = (const struct sockaddr *)(dst + 1); if (s_addr->sa_family != d_addr->sa_family) return 0; if (s_addr->sa_family != AF_INET @@ -477,15 +478,15 @@ static int present_and_same_family(struct sadb_address *src, return 1; } -static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs) { - char *p = (char *) hdr; + const char *p = (char *) hdr; int len = skb->len; len -= sizeof(*hdr); p += sizeof(*hdr); while (len > 0) { - struct sadb_ext *ehdr = (struct sadb_ext *) p; + const struct sadb_ext *ehdr = (const struct sadb_ext *) p; uint16_t ext_type; int ext_len; @@ -514,7 +515,7 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h if (verify_sec_ctx_len(p)) return -EINVAL; } - ext_hdrs[ext_type-1] = p; + ext_hdrs[ext_type-1] = (void *) p; } p += ext_len; len -= ext_len; @@ -606,21 +607,21 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) } static -int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr, xfrm_address_t *xaddr) +int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr) { return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), xaddr); } -static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_msg *hdr, void **ext_hdrs) +static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { - struct sadb_sa *sa; - struct sadb_address *addr; + const struct sadb_sa *sa; + const struct sadb_address *addr; uint16_t proto; unsigned short family; xfrm_address_t *xaddr; - sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; + sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; if (sa == NULL) return NULL; @@ -629,18 +630,18 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_ return NULL; /* sadb_address_len should be checked by caller */ - addr = (struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1]; + addr = (const struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1]; if (addr == NULL) return NULL; - family = ((struct sockaddr *)(addr + 1))->sa_family; + family = ((const struct sockaddr *)(addr + 1))->sa_family; switch (family) { case AF_INET: - xaddr = (xfrm_address_t *)&((struct sockaddr_in *)(addr + 1))->sin_addr; + xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr; break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: - xaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(addr + 1))->sin6_addr; + xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr; break; #endif default: @@ -691,8 +692,8 @@ static inline int pfkey_mode_to_xfrm(int mode) } static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port, - struct sockaddr *sa, - unsigned short family) + struct sockaddr *sa, + unsigned short family) { switch (family) { case AF_INET: @@ -720,7 +721,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port return 0; } -static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, +static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, int add_keys, int hsc) { struct sk_buff *skb; @@ -1010,7 +1011,7 @@ static struct sk_buff *__pfkey_xfrm_state2msg(struct xfrm_state *x, } -static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x) +static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x) { struct sk_buff *skb; @@ -1019,26 +1020,26 @@ static inline struct sk_buff *pfkey_xfrm_state2msg(struct xfrm_state *x) return skb; } -static inline struct sk_buff *pfkey_xfrm_state2msg_expire(struct xfrm_state *x, +static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x, int hsc) { return __pfkey_xfrm_state2msg(x, 0, hsc); } static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, - struct sadb_msg *hdr, - void **ext_hdrs) + const struct sadb_msg *hdr, + void * const *ext_hdrs) { struct xfrm_state *x; - struct sadb_lifetime *lifetime; - struct sadb_sa *sa; - struct sadb_key *key; - struct sadb_x_sec_ctx *sec_ctx; + const struct sadb_lifetime *lifetime; + const struct sadb_sa *sa; + const struct sadb_key *key; + const struct sadb_x_sec_ctx *sec_ctx; uint16_t proto; int err; - sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; + sa = (const struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1]; if (!sa || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) @@ -1077,7 +1078,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || sa->sadb_sa_encrypt > SADB_EALG_MAX) return ERR_PTR(-EINVAL); - key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; + key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && ((key->sadb_key_bits+7) / 8 == 0 || @@ -1104,14 +1105,14 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) x->props.flags |= XFRM_STATE_NOPMTUDISC; - lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; + lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; if (lifetime != NULL) { x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } - lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]; + lifetime = (const struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]; if (lifetime != NULL) { x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); @@ -1119,7 +1120,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } - sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; + sec_ctx = (const struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -1133,7 +1134,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } - key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; + key = (const struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1]; if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); @@ -1202,7 +1203,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, &x->id.daddr); if (ext_hdrs[SADB_X_EXT_SA2-1]) { - struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1]; + const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1]; int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) { err = -EINVAL; @@ -1213,7 +1214,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, } if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) { - struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; + const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; /* Nobody uses this, but we try. */ x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr); @@ -1224,7 +1225,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { - struct sadb_x_nat_t_type* n_type; + const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); @@ -1236,12 +1237,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, natt->encap_type = n_type->sadb_x_nat_t_type_type; if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { - struct sadb_x_nat_t_port* n_port = + const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; natt->encap_sport = n_port->sadb_x_nat_t_port_port; } if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) { - struct sadb_x_nat_t_port* n_port = + const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; natt->encap_dport = n_port->sadb_x_nat_t_port_port; } @@ -1261,12 +1262,12 @@ out: return ERR_PTR(err); } -static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -EOPNOTSUPP; } -static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct sk_buff *resp_skb; @@ -1365,7 +1366,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; } -static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; @@ -1453,7 +1454,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) return 0; } -static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; @@ -1492,7 +1493,7 @@ out: return err; } -static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; @@ -1534,7 +1535,7 @@ out: return err; } -static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); __u8 proto; @@ -1570,7 +1571,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, return 0; } -static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, +static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, gfp_t allocation) { struct sk_buff *skb; @@ -1642,7 +1643,7 @@ out_put_algs: return skb; } -static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *supp_skb; @@ -1671,7 +1672,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg return 0; } -static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr) +static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) { struct sk_buff *skb; struct sadb_msg *hdr; @@ -1710,7 +1711,7 @@ static int key_notify_sa_flush(const struct km_event *c) return 0; } -static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned proto; @@ -1784,7 +1785,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk) xfrm_state_walk_done(&pfk->dump.u.state); } -static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { u8 proto; struct pfkey_sock *pfk = pfkey_sk(sk); @@ -1805,19 +1806,29 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr return pfkey_do_dump(pfk); } -static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); int satype = hdr->sadb_msg_satype; + bool reset_errno = false; if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) { - /* XXX we mangle packet... */ - hdr->sadb_msg_errno = 0; + reset_errno = true; if (satype != 0 && satype != 1) return -EINVAL; pfk->promisc = satype; } - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); + if (reset_errno && skb_cloned(skb)) + skb = skb_copy(skb, GFP_KERNEL); + else + skb = skb_clone(skb, GFP_KERNEL); + + if (reset_errno && skb) { + struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data; + new_hdr->sadb_msg_errno = 0; + } + + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -1921,7 +1932,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) return 0; } -static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) +static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp) { struct xfrm_sec_ctx *xfrm_ctx = xp->security; @@ -1933,9 +1944,9 @@ static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp) return 0; } -static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) +static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp) { - struct xfrm_tmpl *t; + const struct xfrm_tmpl *t; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = 0; int i; @@ -1955,7 +1966,7 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp) pfkey_xfrm_policy2sec_ctx_size(xp); } -static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) +static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp) { struct sk_buff *skb; int size; @@ -1969,7 +1980,7 @@ static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp) return skb; } -static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir) +static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir) { struct sadb_msg *hdr; struct sadb_address *addr; @@ -2065,8 +2076,8 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in pol->sadb_x_policy_priority = xp->priority; for (i=0; ixfrm_nr; i++) { + const struct xfrm_tmpl *t = xp->xfrm_vec + i; struct sadb_x_ipsecrequest *rq; - struct xfrm_tmpl *t = xp->xfrm_vec + i; int req_size; int mode; @@ -2152,7 +2163,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev } -static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err = 0; @@ -2273,7 +2284,7 @@ out: return err; } -static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err; @@ -2350,7 +2361,7 @@ out: return err; } -static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, struct sadb_msg *hdr, int dir) +static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir) { int err; struct sk_buff *out_skb; @@ -2458,7 +2469,7 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, } static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, - struct sadb_msg *hdr, void **ext_hdrs) + const struct sadb_msg *hdr, void * const *ext_hdrs) { int i, len, ret, err = -EINVAL; u8 dir; @@ -2556,7 +2567,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, #endif -static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int dir; @@ -2644,7 +2655,7 @@ static void pfkey_dump_sp_done(struct pfkey_sock *pfk) xfrm_policy_walk_done(&pfk->dump.u.policy); } -static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); @@ -2680,7 +2691,7 @@ static int key_notify_policy_flush(const struct km_event *c) } -static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) +static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct km_event c; @@ -2709,7 +2720,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg } typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, - struct sadb_msg *hdr, void **ext_hdrs); + const struct sadb_msg *hdr, void * const *ext_hdrs); static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_RESERVED] = pfkey_reserved, [SADB_GETSPI] = pfkey_getspi, @@ -2736,7 +2747,7 @@ static pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_X_MIGRATE] = pfkey_migrate, }; -static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr) +static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr) { void *ext_hdrs[SADB_EXT_MAX]; int err; @@ -2781,7 +2792,8 @@ static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp) return hdr; } -static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) +static inline int aalg_tmpl_set(const struct xfrm_tmpl *t, + const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; @@ -2791,7 +2803,8 @@ static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) return (t->aalgos >> id) & 1; } -static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) +static inline int ealg_tmpl_set(const struct xfrm_tmpl *t, + const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; @@ -2801,12 +2814,12 @@ static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d) return (t->ealgos >> id) & 1; } -static int count_ah_combs(struct xfrm_tmpl *t) +static int count_ah_combs(const struct xfrm_tmpl *t) { int i, sz = 0; for (i = 0; ; i++) { - struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); + const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (aalg_tmpl_set(t, aalg) && aalg->available) @@ -2815,12 +2828,12 @@ static int count_ah_combs(struct xfrm_tmpl *t) return sz + sizeof(struct sadb_prop); } -static int count_esp_combs(struct xfrm_tmpl *t) +static int count_esp_combs(const struct xfrm_tmpl *t) { int i, k, sz = 0; for (i = 0; ; i++) { - struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); + const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; @@ -2828,7 +2841,7 @@ static int count_esp_combs(struct xfrm_tmpl *t) continue; for (k = 1; ; k++) { - struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); + const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; @@ -2839,7 +2852,7 @@ static int count_esp_combs(struct xfrm_tmpl *t) return sz + sizeof(struct sadb_prop); } -static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) +static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int i; @@ -2851,7 +2864,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i = 0; ; i++) { - struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); + const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; @@ -2871,7 +2884,7 @@ static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t) } } -static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) +static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int i, k; @@ -2883,7 +2896,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i=0; ; i++) { - struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); + const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; @@ -2892,7 +2905,7 @@ static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t) for (k = 1; ; k++) { struct sadb_comb *c; - struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); + const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) -- cgit v1.1 From 779cb85016587d9ffaea681c38691d5301a3fedc Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Fri, 25 Feb 2011 19:05:47 +0100 Subject: Bluetooth: Use proper command structure in remove_uuid The structure used for command was wrong (probably copy-paste mistake). Signed-off-by: Szymon Janc Acked-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 982becd..4543ede 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -613,7 +613,7 @@ failed: static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) { struct list_head *p, *n; - struct mgmt_cp_add_uuid *cp; + struct mgmt_cp_remove_uuid *cp; struct hci_dev *hdev; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; u16 dev_id; -- cgit v1.1 From 4e51eae9cdda4bf096e73a4ebe23f8f96a17596a Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Fri, 25 Feb 2011 19:05:48 +0100 Subject: Bluetooth: Move index to common header in management interface Most mgmt commands and event are related to hci adapter. Moving index to common header allow to easily use it in command status while reporting errors. For those not related to adapter use MGMT_INDEX_NONE (0xFFFF) as index. Signed-off-by: Szymon Janc Acked-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 407 +++++++++++++++++++++++---------------------------- 1 file changed, 180 insertions(+), 227 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 4543ede..98c92ae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -43,7 +43,7 @@ struct pending_cmd { LIST_HEAD(cmd_list); -static int cmd_status(struct sock *sk, u16 cmd, u8 status) +static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { struct sk_buff *skb; struct mgmt_hdr *hdr; @@ -58,6 +58,7 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status) hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); + hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev)); ev = (void *) skb_put(skb, sizeof(*ev)); @@ -70,7 +71,8 @@ static int cmd_status(struct sock *sk, u16 cmd, u8 status) return 0; } -static int cmd_complete(struct sock *sk, u16 cmd, void *rp, size_t rp_len) +static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp, + size_t rp_len) { struct sk_buff *skb; struct mgmt_hdr *hdr; @@ -85,6 +87,7 @@ static int cmd_complete(struct sock *sk, u16 cmd, void *rp, size_t rp_len) hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(sizeof(*ev) + rp_len); ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); @@ -106,7 +109,8 @@ static int read_version(struct sock *sk) rp.version = MGMT_VERSION; put_unaligned_le16(MGMT_REVISION, &rp.revision); - return cmd_complete(sk, MGMT_OP_READ_VERSION, &rp, sizeof(rp)); + return cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, &rp, + sizeof(rp)); } static int read_index_list(struct sock *sk) @@ -152,32 +156,24 @@ static int read_index_list(struct sock *sk) read_unlock(&hci_dev_list_lock); - err = cmd_complete(sk, MGMT_OP_READ_INDEX_LIST, rp, rp_len); + err = cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_INDEX_LIST, rp, + rp_len); kfree(rp); return err; } -static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) +static int read_controller_info(struct sock *sk, u16 index) { struct mgmt_rp_read_info rp; - struct mgmt_cp_read_info *cp = (void *) data; struct hci_dev *hdev; - u16 dev_id; - BT_DBG("sock %p", sk); - - if (len != 2) - return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL); - - dev_id = get_unaligned_le16(&cp->index); + BT_DBG("sock %p hci%u", sk, index); - BT_DBG("request for hci%u", dev_id); - - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); + return cmd_status(sk, index, MGMT_OP_READ_INFO, ENODEV); hci_del_off_timer(hdev); @@ -185,7 +181,6 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) set_bit(HCI_MGMT, &hdev->flags); - put_unaligned_le16(hdev->id, &rp.index); rp.type = hdev->dev_type; rp.powered = test_bit(HCI_UP, &hdev->flags); @@ -210,7 +205,7 @@ static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) hci_dev_unlock_bh(hdev); hci_dev_put(hdev); - return cmd_complete(sk, MGMT_OP_READ_INFO, &rp, sizeof(rp)); + return cmd_complete(sk, index, MGMT_OP_READ_INFO, &rp, sizeof(rp)); } static void mgmt_pending_free(struct pending_cmd *cmd) @@ -296,37 +291,35 @@ static void mgmt_pending_remove(struct pending_cmd *cmd) mgmt_pending_free(cmd); } -static int set_powered(struct sock *sk, unsigned char *data, u16 len) +static int set_powered(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; struct pending_cmd *cmd; - u16 dev_id; int err, up; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_POWERED, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_POWERED, ENODEV); hci_dev_lock_bh(hdev); up = test_bit(HCI_UP, &hdev->flags); if ((cp->val && up) || (!cp->val && !up)) { - err = cmd_status(sk, MGMT_OP_SET_POWERED, EALREADY); + err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EALREADY); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_POWERED, dev_id)) { - err = cmd_status(sk, MGMT_OP_SET_POWERED, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_POWERED, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_POWERED, EBUSY); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, dev_id, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, index, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -345,44 +338,43 @@ failed: return err; } -static int set_discoverable(struct sock *sk, unsigned char *data, u16 len) +static int set_discoverable(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; struct pending_cmd *cmd; - u16 dev_id; u8 scan; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENETDOWN); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id)) { - err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EBUSY); goto failed; } if (cp->val == test_bit(HCI_ISCAN, &hdev->flags) && test_bit(HCI_PSCAN, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_SET_DISCOVERABLE, EALREADY); + err = cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EALREADY); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, dev_id, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_DISCOVERABLE, index, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -404,43 +396,42 @@ failed: return err; } -static int set_connectable(struct sock *sk, unsigned char *data, u16 len) +static int set_connectable(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct mgmt_mode *cp; struct hci_dev *hdev; struct pending_cmd *cmd; - u16 dev_id; u8 scan; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_CONNECTABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENETDOWN); goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, dev_id) || - mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, dev_id)) { - err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EBUSY); + if (mgmt_pending_find(MGMT_OP_SET_DISCOVERABLE, index) || + mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, index)) { + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EBUSY); goto failed; } if (cp->val == test_bit(HCI_PSCAN, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_SET_CONNECTABLE, EALREADY); + err = cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EALREADY); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, dev_id, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_SET_CONNECTABLE, index, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -462,7 +453,8 @@ failed: return err; } -static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk) +static int mgmt_event(u16 event, u16 index, void *data, u16 data_len, + struct sock *skip_sk) { struct sk_buff *skb; struct mgmt_hdr *hdr; @@ -475,9 +467,11 @@ static int mgmt_event(u16 event, void *data, u16 data_len, struct sock *skip_sk) hdr = (void *) skb_put(skb, sizeof(*hdr)); hdr->opcode = cpu_to_le16(event); + hdr->index = cpu_to_le16(index); hdr->len = cpu_to_le16(data_len); - memcpy(skb_put(skb, data_len), data, data_len); + if (data) + memcpy(skb_put(skb, data_len), data, data_len); hci_send_to_sock(NULL, skb, skip_sk); kfree_skb(skb); @@ -489,27 +483,25 @@ static int send_mode_rsp(struct sock *sk, u16 opcode, u16 index, u8 val) { struct mgmt_mode rp; - put_unaligned_le16(index, &rp.index); rp.val = val; - return cmd_complete(sk, opcode, &rp, sizeof(rp)); + return cmd_complete(sk, index, opcode, &rp, sizeof(rp)); } -static int set_pairable(struct sock *sk, unsigned char *data, u16 len) +static int set_pairable(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct mgmt_mode *cp, ev; struct hci_dev *hdev; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_PAIRABLE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, ENODEV); hci_dev_lock_bh(hdev); @@ -518,14 +510,13 @@ static int set_pairable(struct sock *sk, unsigned char *data, u16 len) else clear_bit(HCI_PAIRABLE, &hdev->flags); - err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, dev_id, cp->val); + err = send_mode_rsp(sk, MGMT_OP_SET_PAIRABLE, index, cp->val); if (err < 0) goto failed; - put_unaligned_le16(dev_id, &ev.index); ev.val = cp->val; - err = mgmt_event(MGMT_EV_PAIRABLE, &ev, sizeof(ev), sk); + err = mgmt_event(MGMT_EV_PAIRABLE, index, &ev, sizeof(ev), sk); failed: hci_dev_unlock_bh(hdev); @@ -567,22 +558,20 @@ static int update_class(struct hci_dev *hdev) return hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); } -static int add_uuid(struct sock *sk, unsigned char *data, u16 len) +static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct mgmt_cp_add_uuid *cp; struct hci_dev *hdev; struct bt_uuid *uuid; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_ADD_UUID, ENODEV); + return cmd_status(sk, index, MGMT_OP_ADD_UUID, ENODEV); hci_dev_lock_bh(hdev); @@ -601,7 +590,7 @@ static int add_uuid(struct sock *sk, unsigned char *data, u16 len) if (err < 0) goto failed; - err = cmd_complete(sk, MGMT_OP_ADD_UUID, &dev_id, sizeof(dev_id)); + err = cmd_complete(sk, index, MGMT_OP_ADD_UUID, NULL, 0); failed: hci_dev_unlock_bh(hdev); @@ -610,23 +599,21 @@ failed: return err; } -static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) +static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct list_head *p, *n; struct mgmt_cp_remove_uuid *cp; struct hci_dev *hdev; u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - u16 dev_id; int err, found; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_REMOVE_UUID, ENODEV); + return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENODEV); hci_dev_lock_bh(hdev); @@ -648,7 +635,7 @@ static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) } if (found == 0) { - err = cmd_status(sk, MGMT_OP_REMOVE_UUID, ENOENT); + err = cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENOENT); goto unlock; } @@ -656,7 +643,7 @@ static int remove_uuid(struct sock *sk, unsigned char *data, u16 len) if (err < 0) goto unlock; - err = cmd_complete(sk, MGMT_OP_REMOVE_UUID, &dev_id, sizeof(dev_id)); + err = cmd_complete(sk, index, MGMT_OP_REMOVE_UUID, NULL, 0); unlock: hci_dev_unlock_bh(hdev); @@ -665,21 +652,20 @@ unlock: return err; } -static int set_dev_class(struct sock *sk, unsigned char *data, u16 len) +static int set_dev_class(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_set_dev_class *cp; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - BT_DBG("request for hci%u", dev_id); + BT_DBG("request for hci%u", index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_DEV_CLASS, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, ENODEV); hci_dev_lock_bh(hdev); @@ -689,8 +675,7 @@ static int set_dev_class(struct sock *sk, unsigned char *data, u16 len) err = update_class(hdev); if (err == 0) - err = cmd_complete(sk, MGMT_OP_SET_DEV_CLASS, &dev_id, - sizeof(dev_id)); + err = cmd_complete(sk, index, MGMT_OP_SET_DEV_CLASS, NULL, 0); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -698,23 +683,22 @@ static int set_dev_class(struct sock *sk, unsigned char *data, u16 len) return err; } -static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) +static int set_service_cache(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_set_service_cache *cp; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_SERVICE_CACHE, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, ENODEV); hci_dev_lock_bh(hdev); - BT_DBG("hci%u enable %d", dev_id, cp->enable); + BT_DBG("hci%u enable %d", index, cp->enable); if (cp->enable) { set_bit(HCI_SERVICE_CACHE, &hdev->flags); @@ -725,8 +709,8 @@ static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) } if (err == 0) - err = cmd_complete(sk, MGMT_OP_SET_SERVICE_CACHE, &dev_id, - sizeof(dev_id)); + err = cmd_complete(sk, index, MGMT_OP_SET_SERVICE_CACHE, NULL, + 0); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -734,15 +718,14 @@ static int set_service_cache(struct sock *sk, unsigned char *data, u16 len) return err; } -static int load_keys(struct sock *sk, unsigned char *data, u16 len) +static int load_keys(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_load_keys *cp; - u16 dev_id, key_count, expected_len; + u16 key_count, expected_len; int i; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); key_count = get_unaligned_le16(&cp->key_count); expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info); @@ -752,11 +735,11 @@ static int load_keys(struct sock *sk, unsigned char *data, u16 len) return -EINVAL; } - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_LOAD_KEYS, ENODEV); + return cmd_status(sk, index, MGMT_OP_LOAD_KEYS, ENODEV); - BT_DBG("hci%u debug_keys %u key_count %u", dev_id, cp->debug_keys, + BT_DBG("hci%u debug_keys %u key_count %u", index, cp->debug_keys, key_count); hci_dev_lock_bh(hdev); @@ -783,26 +766,24 @@ static int load_keys(struct sock *sk, unsigned char *data, u16 len) return 0; } -static int remove_key(struct sock *sk, unsigned char *data, u16 len) +static int remove_key(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_remove_key *cp; struct hci_conn *conn; - u16 dev_id; int err; cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_REMOVE_KEY, ENODEV); + return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, ENODEV); hci_dev_lock_bh(hdev); err = hci_remove_link_key(hdev, &cp->bdaddr); if (err < 0) { - err = cmd_status(sk, MGMT_OP_REMOVE_KEY, -err); + err = cmd_status(sk, index, MGMT_OP_REMOVE_KEY, -err); goto unlock; } @@ -827,44 +808,42 @@ unlock: return err; } -static int disconnect(struct sock *sk, unsigned char *data, u16 len) +static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_disconnect *cp; struct hci_cp_disconnect dc; struct pending_cmd *cmd; struct hci_conn *conn; - u16 dev_id; int err; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_DISCONNECT, ENODEV); + return cmd_status(sk, index, MGMT_OP_DISCONNECT, ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_DISCONNECT, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENETDOWN); goto failed; } - if (mgmt_pending_find(MGMT_OP_DISCONNECT, dev_id)) { - err = cmd_status(sk, MGMT_OP_DISCONNECT, EBUSY); + if (mgmt_pending_find(MGMT_OP_DISCONNECT, index)) { + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, EBUSY); goto failed; } conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &cp->bdaddr); if (!conn) { - err = cmd_status(sk, MGMT_OP_DISCONNECT, ENOTCONN); + err = cmd_status(sk, index, MGMT_OP_DISCONNECT, ENOTCONN); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, dev_id, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, index, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -884,24 +863,24 @@ failed: return err; } -static int get_connections(struct sock *sk, unsigned char *data, u16 len) +static int get_connections(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct mgmt_cp_get_connections *cp; struct mgmt_rp_get_connections *rp; struct hci_dev *hdev; struct list_head *p; size_t rp_len; - u16 dev_id, count; + u16 count; int i, err; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_GET_CONNECTIONS, ENODEV); + return cmd_status(sk, index, MGMT_OP_GET_CONNECTIONS, ENODEV); hci_dev_lock_bh(hdev); @@ -917,7 +896,6 @@ static int get_connections(struct sock *sk, unsigned char *data, u16 len) goto unlock; } - put_unaligned_le16(dev_id, &rp->index); put_unaligned_le16(count, &rp->conn_count); read_lock(&hci_dev_list_lock); @@ -931,7 +909,7 @@ static int get_connections(struct sock *sk, unsigned char *data, u16 len) read_unlock(&hci_dev_list_lock); - err = cmd_complete(sk, MGMT_OP_GET_CONNECTIONS, rp, rp_len); + err = cmd_complete(sk, index, MGMT_OP_GET_CONNECTIONS, rp, rp_len); unlock: kfree(rp); @@ -940,32 +918,31 @@ unlock: return err; } -static int pin_code_reply(struct sock *sk, unsigned char *data, u16 len) +static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_pin_code_reply *cp; struct hci_cp_pin_code_reply reply; struct pending_cmd *cmd; - u16 dev_id; int err; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_PIN_CODE_REPLY, ENODEV); + return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_PIN_CODE_REPLY, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENETDOWN); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, dev_id, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_REPLY, index, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -986,31 +963,32 @@ failed: return err; } -static int pin_code_neg_reply(struct sock *sk, unsigned char *data, u16 len) +static int pin_code_neg_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_pin_code_neg_reply *cp; struct pending_cmd *cmd; - u16 dev_id; int err; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_PIN_CODE_NEG_REPLY, ENODEV); + return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + ENODEV); hci_dev_lock_bh(hdev); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, MGMT_OP_PIN_CODE_NEG_REPLY, ENETDOWN); + err = cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + ENETDOWN); goto failed; } - cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, dev_id, + cmd = mgmt_pending_add(sk, MGMT_OP_PIN_CODE_NEG_REPLY, index, data, len); if (!cmd) { err = -ENOMEM; @@ -1029,20 +1007,19 @@ failed: return err; } -static int set_io_capability(struct sock *sk, unsigned char *data, u16 len) +static int set_io_capability(struct sock *sk, u16 index, unsigned char *data, + u16 len) { struct hci_dev *hdev; struct mgmt_cp_set_io_capability *cp; - u16 dev_id; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_SET_IO_CAPABILITY, ENODEV); + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, ENODEV); hci_dev_lock_bh(hdev); @@ -1054,8 +1031,7 @@ static int set_io_capability(struct sock *sk, unsigned char *data, u16 len) hci_dev_unlock_bh(hdev); hci_dev_put(hdev); - return cmd_complete(sk, MGMT_OP_SET_IO_CAPABILITY, - &dev_id, sizeof(dev_id)); + return cmd_complete(sk, index, MGMT_OP_SET_IO_CAPABILITY, NULL, 0); } static inline struct pending_cmd *find_pairing(struct hci_conn *conn) @@ -1088,11 +1064,10 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status) struct mgmt_rp_pair_device rp; struct hci_conn *conn = cmd->user_data; - rp.index = cmd->index; bacpy(&rp.bdaddr, &conn->dst); rp.status = status; - cmd_complete(cmd->sk, MGMT_OP_PAIR_DEVICE, &rp, sizeof(rp)); + cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ conn->connect_cfm_cb = NULL; @@ -1119,24 +1094,22 @@ static void pairing_complete_cb(struct hci_conn *conn, u8 status) pairing_complete(cmd, status); } -static int pair_device(struct sock *sk, unsigned char *data, u16 len) +static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len) { struct hci_dev *hdev; struct mgmt_cp_pair_device *cp; struct pending_cmd *cmd; u8 sec_level, auth_type; struct hci_conn *conn; - u16 dev_id; int err; BT_DBG(""); cp = (void *) data; - dev_id = get_unaligned_le16(&cp->index); - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, MGMT_OP_PAIR_DEVICE, ENODEV); + return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, ENODEV); hci_dev_lock_bh(hdev); @@ -1156,11 +1129,11 @@ static int pair_device(struct sock *sk, unsigned char *data, u16 len) if (conn->connect_cfm_cb) { hci_conn_put(conn); - err = cmd_status(sk, MGMT_OP_PAIR_DEVICE, EBUSY); + err = cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EBUSY); goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, dev_id, data, len); + cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, index, data, len); if (!cmd) { err = -ENOMEM; hci_conn_put(conn); @@ -1186,19 +1159,17 @@ unlock: return err; } -static int user_confirm_reply(struct sock *sk, unsigned char *data, u16 len, - int success) +static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data, + u16 len, int success) { struct mgmt_cp_user_confirm_reply *cp = (void *) data; - u16 dev_id, mgmt_op, hci_op; + u16 mgmt_op, hci_op; struct pending_cmd *cmd; struct hci_dev *hdev; int err; BT_DBG(""); - dev_id = get_unaligned_le16(&cp->index); - if (success) { mgmt_op = MGMT_OP_USER_CONFIRM_REPLY; hci_op = HCI_OP_USER_CONFIRM_REPLY; @@ -1207,16 +1178,16 @@ static int user_confirm_reply(struct sock *sk, unsigned char *data, u16 len, hci_op = HCI_OP_USER_CONFIRM_NEG_REPLY; } - hdev = hci_dev_get(dev_id); + hdev = hci_dev_get(index); if (!hdev) - return cmd_status(sk, mgmt_op, ENODEV); + return cmd_status(sk, index, mgmt_op, ENODEV); if (!test_bit(HCI_UP, &hdev->flags)) { - err = cmd_status(sk, mgmt_op, ENETDOWN); + err = cmd_status(sk, index, mgmt_op, ENETDOWN); goto failed; } - cmd = mgmt_pending_add(sk, mgmt_op, dev_id, data, len); + cmd = mgmt_pending_add(sk, mgmt_op, index, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -1237,7 +1208,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) { unsigned char *buf; struct mgmt_hdr *hdr; - u16 opcode, len; + u16 opcode, index, len; int err; BT_DBG("got %zu bytes", msglen); @@ -1256,6 +1227,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) hdr = (struct mgmt_hdr *) buf; opcode = get_unaligned_le16(&hdr->opcode); + index = get_unaligned_le16(&hdr->index); len = get_unaligned_le16(&hdr->len); if (len != msglen - sizeof(*hdr)) { @@ -1271,65 +1243,65 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) err = read_index_list(sk); break; case MGMT_OP_READ_INFO: - err = read_controller_info(sk, buf + sizeof(*hdr), len); + err = read_controller_info(sk, index); break; case MGMT_OP_SET_POWERED: - err = set_powered(sk, buf + sizeof(*hdr), len); + err = set_powered(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_DISCOVERABLE: - err = set_discoverable(sk, buf + sizeof(*hdr), len); + err = set_discoverable(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_CONNECTABLE: - err = set_connectable(sk, buf + sizeof(*hdr), len); + err = set_connectable(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_PAIRABLE: - err = set_pairable(sk, buf + sizeof(*hdr), len); + err = set_pairable(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_ADD_UUID: - err = add_uuid(sk, buf + sizeof(*hdr), len); + err = add_uuid(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_REMOVE_UUID: - err = remove_uuid(sk, buf + sizeof(*hdr), len); + err = remove_uuid(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_DEV_CLASS: - err = set_dev_class(sk, buf + sizeof(*hdr), len); + err = set_dev_class(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_SERVICE_CACHE: - err = set_service_cache(sk, buf + sizeof(*hdr), len); + err = set_service_cache(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_LOAD_KEYS: - err = load_keys(sk, buf + sizeof(*hdr), len); + err = load_keys(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_REMOVE_KEY: - err = remove_key(sk, buf + sizeof(*hdr), len); + err = remove_key(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_DISCONNECT: - err = disconnect(sk, buf + sizeof(*hdr), len); + err = disconnect(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_GET_CONNECTIONS: - err = get_connections(sk, buf + sizeof(*hdr), len); + err = get_connections(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_PIN_CODE_REPLY: - err = pin_code_reply(sk, buf + sizeof(*hdr), len); + err = pin_code_reply(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_PIN_CODE_NEG_REPLY: - err = pin_code_neg_reply(sk, buf + sizeof(*hdr), len); + err = pin_code_neg_reply(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_SET_IO_CAPABILITY: - err = set_io_capability(sk, buf + sizeof(*hdr), len); + err = set_io_capability(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_PAIR_DEVICE: - err = pair_device(sk, buf + sizeof(*hdr), len); + err = pair_device(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_USER_CONFIRM_REPLY: - err = user_confirm_reply(sk, buf + sizeof(*hdr), len, 1); + err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 1); break; case MGMT_OP_USER_CONFIRM_NEG_REPLY: - err = user_confirm_reply(sk, buf + sizeof(*hdr), len, 0); + err = user_confirm_reply(sk, index, buf + sizeof(*hdr), len, 0); break; default: BT_DBG("Unknown op %u", opcode); - err = cmd_status(sk, opcode, 0x01); + err = cmd_status(sk, index, opcode, 0x01); break; } @@ -1345,20 +1317,12 @@ done: int mgmt_index_added(u16 index) { - struct mgmt_ev_index_added ev; - - put_unaligned_le16(index, &ev.index); - - return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_INDEX_ADDED, index, NULL, 0, NULL); } int mgmt_index_removed(u16 index) { - struct mgmt_ev_index_added ev; - - put_unaligned_le16(index, &ev.index); - - return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_INDEX_REMOVED, index, NULL, 0, NULL); } struct cmd_lookup { @@ -1394,10 +1358,9 @@ int mgmt_powered(u16 index, u8 powered) mgmt_pending_foreach(MGMT_OP_SET_POWERED, index, mode_rsp, &match); - put_unaligned_le16(index, &ev.index); ev.val = powered; - ret = mgmt_event(MGMT_EV_POWERED, &ev, sizeof(ev), match.sk); + ret = mgmt_event(MGMT_EV_POWERED, index, &ev, sizeof(ev), match.sk); if (match.sk) sock_put(match.sk); @@ -1414,10 +1377,10 @@ int mgmt_discoverable(u16 index, u8 discoverable) mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, mode_rsp, &match); - put_unaligned_le16(index, &ev.index); ev.val = discoverable; - ret = mgmt_event(MGMT_EV_DISCOVERABLE, &ev, sizeof(ev), match.sk); + ret = mgmt_event(MGMT_EV_DISCOVERABLE, index, &ev, sizeof(ev), + match.sk); if (match.sk) sock_put(match.sk); @@ -1433,10 +1396,9 @@ int mgmt_connectable(u16 index, u8 connectable) mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, index, mode_rsp, &match); - put_unaligned_le16(index, &ev.index); ev.val = connectable; - ret = mgmt_event(MGMT_EV_CONNECTABLE, &ev, sizeof(ev), match.sk); + ret = mgmt_event(MGMT_EV_CONNECTABLE, index, &ev, sizeof(ev), match.sk); if (match.sk) sock_put(match.sk); @@ -1450,25 +1412,22 @@ int mgmt_new_key(u16 index, struct link_key *key, u8 old_key_type) memset(&ev, 0, sizeof(ev)); - put_unaligned_le16(index, &ev.index); - bacpy(&ev.key.bdaddr, &key->bdaddr); ev.key.type = key->type; memcpy(ev.key.val, key->val, 16); ev.key.pin_len = key->pin_len; ev.old_key_type = old_key_type; - return mgmt_event(MGMT_EV_NEW_KEY, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_NEW_KEY, index, &ev, sizeof(ev), NULL); } int mgmt_connected(u16 index, bdaddr_t *bdaddr) { struct mgmt_ev_connected ev; - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); - return mgmt_event(MGMT_EV_CONNECTED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_CONNECTED, index, &ev, sizeof(ev), NULL); } static void disconnect_rsp(struct pending_cmd *cmd, void *data) @@ -1477,10 +1436,9 @@ static void disconnect_rsp(struct pending_cmd *cmd, void *data) struct sock **sk = data; struct mgmt_rp_disconnect rp; - put_unaligned_le16(cmd->index, &rp.index); bacpy(&rp.bdaddr, &cp->bdaddr); - cmd_complete(cmd->sk, MGMT_OP_DISCONNECT, &rp, sizeof(rp)); + cmd_complete(cmd->sk, cmd->index, MGMT_OP_DISCONNECT, &rp, sizeof(rp)); *sk = cmd->sk; sock_hold(*sk); @@ -1496,10 +1454,9 @@ int mgmt_disconnected(u16 index, bdaddr_t *bdaddr) mgmt_pending_foreach(MGMT_OP_DISCONNECT, index, disconnect_rsp, &sk); - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); - err = mgmt_event(MGMT_EV_DISCONNECTED, &ev, sizeof(ev), sk); + err = mgmt_event(MGMT_EV_DISCONNECTED, index, &ev, sizeof(ev), sk); if (sk) sock_put(sk); @@ -1516,7 +1473,7 @@ int mgmt_disconnect_failed(u16 index) if (!cmd) return -ENOENT; - err = cmd_status(cmd->sk, MGMT_OP_DISCONNECT, EIO); + err = cmd_status(cmd->sk, index, MGMT_OP_DISCONNECT, EIO); mgmt_pending_remove(cmd); @@ -1527,21 +1484,20 @@ int mgmt_connect_failed(u16 index, bdaddr_t *bdaddr, u8 status) { struct mgmt_ev_connect_failed ev; - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); ev.status = status; - return mgmt_event(MGMT_EV_CONNECT_FAILED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_CONNECT_FAILED, index, &ev, sizeof(ev), NULL); } int mgmt_pin_code_request(u16 index, bdaddr_t *bdaddr) { struct mgmt_ev_pin_code_request ev; - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); - return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_PIN_CODE_REQUEST, index, &ev, sizeof(ev), + NULL); } int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) @@ -1554,11 +1510,11 @@ int mgmt_pin_code_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) if (!cmd) return -ENOENT; - put_unaligned_le16(index, &rp.index); bacpy(&rp.bdaddr, bdaddr); rp.status = status; - err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_REPLY, &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_REPLY, &rp, + sizeof(rp)); mgmt_pending_remove(cmd); @@ -1575,12 +1531,11 @@ int mgmt_pin_code_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) if (!cmd) return -ENOENT; - put_unaligned_le16(index, &rp.index); bacpy(&rp.bdaddr, bdaddr); rp.status = status; - err = cmd_complete(cmd->sk, MGMT_OP_PIN_CODE_NEG_REPLY, - &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, &rp, + sizeof(rp)); mgmt_pending_remove(cmd); @@ -1593,11 +1548,11 @@ int mgmt_user_confirm_request(u16 index, bdaddr_t *bdaddr, __le32 value) BT_DBG("hci%u", index); - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); put_unaligned_le32(value, &ev.value); - return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_USER_CONFIRM_REQUEST, index, &ev, sizeof(ev), + NULL); } static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status, @@ -1611,10 +1566,9 @@ static int confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status, if (!cmd) return -ENOENT; - put_unaligned_le16(index, &rp.index); bacpy(&rp.bdaddr, bdaddr); rp.status = status; - err = cmd_complete(cmd->sk, opcode, &rp, sizeof(rp)); + err = cmd_complete(cmd->sk, index, opcode, &rp, sizeof(rp)); mgmt_pending_remove(cmd); @@ -1638,9 +1592,8 @@ int mgmt_auth_failed(u16 index, bdaddr_t *bdaddr, u8 status) { struct mgmt_ev_auth_failed ev; - put_unaligned_le16(index, &ev.index); bacpy(&ev.bdaddr, bdaddr); ev.status = status; - return mgmt_event(MGMT_EV_AUTH_FAILED, &ev, sizeof(ev), NULL); + return mgmt_event(MGMT_EV_AUTH_FAILED, index, &ev, sizeof(ev), NULL); } -- cgit v1.1 From bdce7bafb786701004b2055e15d6ff4b3be678f3 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Fri, 25 Feb 2011 19:05:49 +0100 Subject: Bluetooth: Validate data size before accessing mgmt commands Crafted (too small) data buffer could result in reading data outside of buffer. Validate buffer size and return EINVAL if size is wrong. Signed-off-by: Szymon Janc Acked-by: Johan Hedberg Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 98c92ae..16c7a4d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -302,6 +302,9 @@ static int set_powered(struct sock *sk, u16 index, unsigned char *data, u16 len) BT_DBG("request for hci%u", index); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_POWERED, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_SET_POWERED, ENODEV); @@ -351,6 +354,9 @@ static int set_discoverable(struct sock *sk, u16 index, unsigned char *data, BT_DBG("request for hci%u", index); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_SET_DISCOVERABLE, ENODEV); @@ -409,6 +415,9 @@ static int set_connectable(struct sock *sk, u16 index, unsigned char *data, BT_DBG("request for hci%u", index); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_SET_CONNECTABLE, ENODEV); @@ -499,6 +508,9 @@ static int set_pairable(struct sock *sk, u16 index, unsigned char *data, BT_DBG("request for hci%u", index); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_SET_PAIRABLE, ENODEV); @@ -569,6 +581,9 @@ static int add_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) BT_DBG("request for hci%u", index); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_ADD_UUID, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_ADD_UUID, ENODEV); @@ -611,6 +626,9 @@ static int remove_uuid(struct sock *sk, u16 index, unsigned char *data, u16 len) BT_DBG("request for hci%u", index); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_REMOVE_UUID, ENODEV); @@ -663,6 +681,9 @@ static int set_dev_class(struct sock *sk, u16 index, unsigned char *data, BT_DBG("request for hci%u", index); + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_SET_DEV_CLASS, ENODEV); @@ -692,6 +713,10 @@ static int set_service_cache(struct sock *sk, u16 index, unsigned char *data, cp = (void *) data; + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, + EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, ENODEV); @@ -726,6 +751,10 @@ static int load_keys(struct sock *sk, u16 index, unsigned char *data, u16 len) int i; cp = (void *) data; + + if (len < sizeof(*cp)) + return -EINVAL; + key_count = get_unaligned_le16(&cp->key_count); expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_key_info); @@ -775,6 +804,9 @@ static int remove_key(struct sock *sk, u16 index, unsigned char *data, u16 len) cp = (void *) data; + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_REMOVE_KEY, ENODEV); @@ -821,6 +853,9 @@ static int disconnect(struct sock *sk, u16 index, unsigned char *data, u16 len) cp = (void *) data; + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_DISCONNECT, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_DISCONNECT, ENODEV); @@ -931,6 +966,9 @@ static int pin_code_reply(struct sock *sk, u16 index, unsigned char *data, cp = (void *) data; + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_PIN_CODE_REPLY, ENODEV); @@ -975,6 +1013,10 @@ static int pin_code_neg_reply(struct sock *sk, u16 index, unsigned char *data, cp = (void *) data; + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, + EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_PIN_CODE_NEG_REPLY, @@ -1017,6 +1059,10 @@ static int set_io_capability(struct sock *sk, u16 index, unsigned char *data, cp = (void *) data; + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, + EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, ENODEV); @@ -1107,6 +1153,9 @@ static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len) cp = (void *) data; + if (len != sizeof(*cp)) + return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_PAIR_DEVICE, ENODEV); @@ -1178,6 +1227,9 @@ static int user_confirm_reply(struct sock *sk, u16 index, unsigned char *data, hci_op = HCI_OP_USER_CONFIRM_NEG_REPLY; } + if (len != sizeof(*cp)) + return cmd_status(sk, index, mgmt_op, EINVAL); + hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, mgmt_op, ENODEV); -- cgit v1.1 From 30e7627219f985cd17a1ac24e0163ebcfb1277bf Mon Sep 17 00:00:00 2001 From: Ville Tervo Date: Tue, 22 Feb 2011 16:10:53 -0300 Subject: Bluetooth: Use ERR_PTR as return error from hci_connect Use ERR_PTR mechanism to return error from hci_connect. Signed-off-by: Ville Tervo Signed-off-by: Anderson Briglia Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/l2cap_core.c | 11 ++++++----- net/bluetooth/mgmt.c | 4 ++-- net/bluetooth/sco.c | 7 ++++--- 4 files changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4504cb6..7a6f56b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -431,10 +431,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (type == LE_LINK) { le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst); if (le) - return NULL; + return ERR_PTR(-EBUSY); le = hci_conn_add(hdev, LE_LINK, dst); if (!le) - return NULL; + return ERR_PTR(-ENOMEM); if (le->state == BT_OPEN) hci_le_connect(le); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 1db6c90..c9f9cec 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -852,8 +852,6 @@ int l2cap_do_connect(struct sock *sk) hci_dev_lock_bh(hdev); - err = -ENOMEM; - auth_type = l2cap_get_auth_type(sk); if (l2cap_pi(sk)->dcid == L2CAP_CID_LE_DATA) @@ -863,17 +861,18 @@ int l2cap_do_connect(struct sock *sk) hcon = hci_connect(hdev, ACL_LINK, dst, l2cap_pi(sk)->sec_level, auth_type); - if (!hcon) + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); goto done; + } conn = l2cap_conn_add(hcon, 0); if (!conn) { hci_conn_put(hcon); + err = -ENOMEM; goto done; } - err = 0; - /* Update source addr of the socket */ bacpy(src, conn->src); @@ -892,6 +891,8 @@ int l2cap_do_connect(struct sock *sk) l2cap_do_start(sk); } + err = 0; + done: hci_dev_unlock_bh(hdev); hci_dev_put(hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 16c7a4d..46c3edc 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1171,8 +1171,8 @@ static int pair_device(struct sock *sk, u16 index, unsigned char *data, u16 len) } conn = hci_connect(hdev, ACL_LINK, &cp->bdaddr, sec_level, auth_type); - if (!conn) { - err = -ENOMEM; + if (IS_ERR(conn)) { + err = PTR_ERR(conn); goto unlock; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index c9348dd..42fdffd 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -190,20 +190,21 @@ static int sco_connect(struct sock *sk) hci_dev_lock_bh(hdev); - err = -ENOMEM; - if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK; hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); - if (!hcon) + if (IS_ERR(hcon)) { + err = PTR_ERR(hcon); goto done; + } conn = sco_conn_add(hcon, 0); if (!conn) { hci_conn_put(hcon); + err = -ENOMEM; goto done; } -- cgit v1.1 From 080e4130b1fb6a02e75149a1cccc8192e734713d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 17 Feb 2011 23:43:33 +0000 Subject: netpoll: remove IFF_IN_NETPOLL flag V4: rebase to net-next-2.6 This patch removes the flag IFF_IN_NETPOLL, we don't need it any more since we have netpoll_tx_running() now. Signed-off-by: WANG Cong Acked-by: Neil Horman Cc: Herbert Xu Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 02dc2cb..f68e694 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -313,9 +313,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) { - dev->priv_flags |= IFF_IN_NETPOLL; status = ops->ndo_start_xmit(skb, dev); - dev->priv_flags &= ~IFF_IN_NETPOLL; if (status == NETDEV_TX_OK) txq_trans_update(txq); } -- cgit v1.1 From 5a698af53fb85b92d6462939a2c75ec4c7233bb9 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Thu, 17 Feb 2011 23:43:34 +0000 Subject: bond: service netpoll arp queue on master device Neil pointed out that we can't send ARP reply on behalf of slaves, we need to move the arp queue to their bond device. Signed-off-by: WANG Cong Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/netpoll.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index f68e694..06be243 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -193,6 +193,17 @@ void netpoll_poll_dev(struct net_device *dev) poll_napi(dev); + if (dev->priv_flags & IFF_SLAVE) { + if (dev->npinfo) { + struct net_device *bond_dev = dev->master; + struct sk_buff *skb; + while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { + skb->dev = bond_dev; + skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); + } + } + } + service_arp_queue(dev->npinfo); zap_completion_queue(); -- cgit v1.1 From 6f2f19ed955e62a6789495da512d510f26ad4885 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 27 Feb 2011 23:04:45 -0800 Subject: xfrm: Pass name as const to xfrm_*_get_byname(). Signed-off-by: David S. Miller --- net/xfrm/xfrm_algo.c | 8 ++++---- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 8b4d6e3..58064d9 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -618,21 +618,21 @@ static int xfrm_alg_name_match(const struct xfrm_algo_desc *entry, (entry->compat && !strcmp(name, entry->compat))); } -struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_aalg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_aalg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); -struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_ealg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_ealg_list, xfrm_alg_name_match, name, probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); -struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) +struct xfrm_algo_desc *xfrm_calg_get_byname(const char *name, int probe) { return xfrm_find_algo(&xfrm_calg_list, xfrm_alg_name_match, name, probe); @@ -654,7 +654,7 @@ static int xfrm_aead_name_match(const struct xfrm_algo_desc *entry, !strcmp(name, entry->name); } -struct xfrm_algo_desc *xfrm_aead_get_byname(char *name, int icv_len, int probe) +struct xfrm_algo_desc *xfrm_aead_get_byname(const char *name, int icv_len, int probe) { struct xfrm_aead_name data = { .name = name, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b43c1b1..673698d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -234,7 +234,7 @@ out: } static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, - struct xfrm_algo_desc *(*get_byname)(char *, int), + struct xfrm_algo_desc *(*get_byname)(const char *, int), struct nlattr *rta) { struct xfrm_algo *p, *ualg; -- cgit v1.1 From a70486f0e669730bad6713063e3f59e2e870044f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 27 Feb 2011 23:17:24 -0800 Subject: xfrm: Pass const xfrm_address_t objects to xfrm_state_lookup* and xfrm_find_acq. Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 8496b3d..81221d9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -997,7 +997,11 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, + unsigned short family, u8 mode, + u32 reqid, u8 proto, + const xfrm_address_t *daddr, + const xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct hlist_node *entry; @@ -1375,7 +1379,7 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, +xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; @@ -1389,7 +1393,7 @@ EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * xfrm_state_lookup_byaddr(struct net *net, u32 mark, - xfrm_address_t *daddr, xfrm_address_t *saddr, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; @@ -1403,7 +1407,7 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, + const xfrm_address_t *daddr, const xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; -- cgit v1.1 From 8a80c79a776d1b1b54895314ffaf53d0c7604c80 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Feb 2011 17:59:15 +0100 Subject: netfilter: nf_ct_tcp: fix out of sync scenario while in SYN_RECV This patch fixes the out of sync scenarios while in SYN_RECV state. Quoting Jozsef, what it happens if we are out of sync if the following: > > b. conntrack entry is outdated, new SYN received > > - (b1) we ignore it but save the initialization data from it > > - (b2) when the reply SYN/ACK receives and it matches the saved data, > > we pick up the new connection This is what it should happen if we are in SYN_RECV state. Initially, the SYN packet hits b1, thus we save data from it. But the SYN/ACK packet is considered a retransmission given that we're in SYN_RECV state. Therefore, we never hit b2 and we don't get in sync. To fix this, we ignore SYN/ACK if we are in SYN_RECV. If the previous packet was a SYN, then we enter the ignore case that get us in sync. This patch helps a lot to conntrackd in stress scenarios (assumming a client that generates lots of small TCP connections). During the failover, consider that the new primary has injected one outdated flow in SYN_RECV state (this is likely to happen if the conntrack event rate is high because the backup will be a bit delayed from the primary). With the current code, if the client starts a new fresh connection that matches the tuple, the SYN packet will be ignored without updating the state tracking, and the SYN+ACK in reply will blocked as it will not pass checkings III or IV (since all state tracking in the original direction is not initialized because of the SYN packet was ignored and the ignore case that get us in sync is not applied). I posted a couple of patches before this one. Changli Gao spotted a simpler way to fix this problem. This patch implements his idea. Cc: Changli Gao Cc: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 6f38d0e..37bf943 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -227,11 +227,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, +/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* * sSS -> sSR Standard open. * sS2 -> sSR Simultaneous open - * sSR -> sSR Retransmitted SYN/ACK. + * sSR -> sIG Retransmitted SYN/ACK, ignore it. * sES -> sIG Late retransmitted SYN/ACK? * sFW -> sIG Might be SYN/ACK answering ignored SYN * sCW -> sIG -- cgit v1.1 From 63d8ea7f93e1fb9d1aa9509ab3e1a71199245c80 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 28 Feb 2011 10:48:59 -0800 Subject: net: Forgot to commit net/core/dev.c part of Jiri's ->rx_handler patch. Signed-off-by: David S. Miller --- net/core/dev.c | 119 +++++++++++++++------------------------------------------ 1 file changed, 31 insertions(+), 88 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 69a3c08..30440e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3096,63 +3096,31 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, - struct net_device *master) +static void vlan_on_bond_hook(struct sk_buff *skb) { - if (skb->pkt_type == PACKET_HOST) { - u16 *dest = (u16 *) eth_hdr(skb)->h_dest; + /* + * Make sure ARP frames received on VLAN interfaces stacked on + * bonding interfaces still make their way to any base bonding + * device that may have registered for a specific ptype. + */ + if (skb->dev->priv_flags & IFF_802_1Q_VLAN && + vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING && + skb->protocol == htons(ETH_P_ARP)) { + struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - memcpy(dest, master->dev_addr, ETH_ALEN); + if (!skb2) + return; + skb2->dev = vlan_dev_real_dev(skb->dev); + netif_rx(skb2); } } -/* On bonding slaves other than the currently active slave, suppress - * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and - * ARP on active-backup slaves with arp_validate enabled. - */ -static int __skb_bond_should_drop(struct sk_buff *skb, - struct net_device *master) -{ - struct net_device *dev = skb->dev; - - if (master->priv_flags & IFF_MASTER_ARPMON) - dev->last_rx = jiffies; - - if ((master->priv_flags & IFF_MASTER_ALB) && - (master->priv_flags & IFF_BRIDGE_PORT)) { - /* Do address unmangle. The local destination address - * will be always the one master has. Provides the right - * functionality in a bridge. - */ - skb_bond_set_mac_by_master(skb, master); - } - - if (dev->priv_flags & IFF_SLAVE_INACTIVE) { - if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && - skb->protocol == __cpu_to_be16(ETH_P_ARP)) - return 0; - - if (master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) - return 0; - } - if (master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __cpu_to_be16(ETH_P_SLOW)) - return 0; - - return 1; - } - return 0; -} - static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; struct net_device *orig_dev; - struct net_device *null_or_orig; - struct net_device *orig_or_bond; + struct net_device *null_or_dev; int ret = NET_RX_DROP; __be16 type; @@ -3167,32 +3135,8 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; - - /* - * bonding note: skbs received on inactive slaves should only - * be delivered to pkt handlers that are exact matches. Also - * the deliver_no_wcard flag will be set. If packet handlers - * are sensitive to duplicate packets these skbs will need to - * be dropped at the handler. - */ - null_or_orig = NULL; orig_dev = skb->dev; - if (skb->deliver_no_wcard) - null_or_orig = orig_dev; - else if (netif_is_bond_slave(orig_dev)) { - struct net_device *bond_master = ACCESS_ONCE(orig_dev->master); - - if (likely(bond_master)) { - if (__skb_bond_should_drop(skb, bond_master)) { - skb->deliver_no_wcard = 1; - /* deliver only exact match */ - null_or_orig = orig_dev; - } else - skb->dev = bond_master; - } - } - __this_cpu_inc(softnet_data.processed); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; @@ -3201,6 +3145,10 @@ static int __netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); +another_round: + + __this_cpu_inc(softnet_data.processed); + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -3209,8 +3157,7 @@ static int __netif_receive_skb(struct sk_buff *skb) #endif list_for_each_entry_rcu(ptype, &ptype_all, list) { - if (ptype->dev == null_or_orig || ptype->dev == skb->dev || - ptype->dev == orig_dev) { + if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; @@ -3224,16 +3171,20 @@ static int __netif_receive_skb(struct sk_buff *skb) ncls: #endif - /* Handle special case of bridge or macvlan */ rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { + struct net_device *prev_dev; + if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } + prev_dev = skb->dev; skb = rx_handler(skb); if (!skb) goto out; + if (skb->dev != prev_dev) + goto another_round; } if (vlan_tx_tag_present(skb)) { @@ -3248,24 +3199,16 @@ ncls: goto out; } - /* - * Make sure frames received on VLAN interfaces stacked on - * bonding interfaces still make their way to any base bonding - * device that may have registered for a specific ptype. The - * handler may have to adjust skb->dev and orig_dev. - */ - orig_or_bond = orig_dev; - if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && - (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { - orig_or_bond = vlan_dev_real_dev(skb->dev); - } + vlan_on_bond_hook(skb); + + /* deliver only exact match when indicated */ + null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL; type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && (ptype->dev == null_or_orig || - ptype->dev == skb->dev || ptype->dev == orig_dev || - ptype->dev == orig_or_bond)) { + if (ptype->type == type && + (ptype->dev == null_or_dev || ptype->dev == skb->dev)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; -- cgit v1.1 From d45dcef77019012fc6769e657fc2f1a5d681bbbb Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Fri, 25 Feb 2011 22:41:25 -0300 Subject: Bluetooth: Fix BT_L2CAP and BT_SCO in Kconfig If we want something "bool" built-in in something "tristate" it can't "depend on" the tristate config option. Report by DaveM: I give it 'y' just to make it happen, for both, and afterways no matter how many times I rerun "make oldconfig" I keep seeing things like this in my build: scripts/kconfig/conf --silentoldconfig Kconfig include/config/auto.conf:986:warning: symbol value 'm' invalid for BT_SCO include/config/auto.conf:3156:warning: symbol value 'm' invalid for BT_L2CAP Reported-by: David S. Miller Signed-off-by: Gustavo F. Padovan Signed-off-by: John W. Linville --- net/bluetooth/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index c6f9c2f..6ae5ec5 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -31,9 +31,10 @@ menuconfig BT to Bluetooth kernel modules are provided in the BlueZ packages. For more information, see . +if BT != n + config BT_L2CAP bool "L2CAP protocol support" - depends on BT select CRC16 help L2CAP (Logical Link Control and Adaptation Protocol) provides @@ -42,11 +43,12 @@ config BT_L2CAP config BT_SCO bool "SCO links support" - depends on BT help SCO link provides voice transport over Bluetooth. SCO support is required for voice applications like Headset and Audio. +endif + source "net/bluetooth/rfcomm/Kconfig" source "net/bluetooth/bnep/Kconfig" -- cgit v1.1 From 696ea472e19c6d1fa843bb1abce73b9c3a414391 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 22 Feb 2011 01:55:18 +0000 Subject: llc: avoid skb_clone() if there is only one handler Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/llc/llc_input.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index f996874..058f1e9 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -181,25 +181,26 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, * LLC functionality */ rcv = rcu_dereference(sap->rcv_func); - if (rcv) { - struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC); - if (cskb) - rcv(cskb, dev, pt, orig_dev); - } dest = llc_pdu_type(skb); - if (unlikely(!dest || !llc_type_handlers[dest - 1])) - goto drop_put; - llc_type_handlers[dest - 1](sap, skb); -out_put: + if (unlikely(!dest || !llc_type_handlers[dest - 1])) { + if (rcv) + rcv(skb, dev, pt, orig_dev); + else + kfree_skb(skb); + } else { + if (rcv) { + struct sk_buff *cskb = skb_clone(skb, GFP_ATOMIC); + if (cskb) + rcv(cskb, dev, pt, orig_dev); + } + llc_type_handlers[dest - 1](sap, skb); + } llc_sap_put(sap); out: return 0; drop: kfree_skb(skb); goto out; -drop_put: - kfree_skb(skb); - goto out_put; handle_station: if (!llc_station_handler) goto drop; -- cgit v1.1 From a693e69897e7811e2790295f38a0ce3a92c4b45c Mon Sep 17 00:00:00 2001 From: Anders Berggren Date: Mon, 28 Feb 2011 12:32:11 -0800 Subject: net: TX timestamps for IPv6 UDP packets Enabling TX timestamps (SO_TIMESTAMPING) for IPv6 UDP packets, in the same fashion as for IPv4. Necessary in order for NICs such as Intel 82580 to timestamp IPv6 packets. Signed-off-by: Anders Berggren Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 25a2647..065b3f7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1118,6 +1118,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int err; int offset = 0; int csummode = CHECKSUM_NONE; + __u8 tx_flags = 0; if (flags&MSG_PROBE) return 0; @@ -1202,6 +1203,13 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } } + /* For UDP, check if TX timestamp is enabled */ + if (sk->sk_type == SOCK_DGRAM) { + err = sock_tx_timestamp(sk, &tx_flags); + if (err) + goto error; + } + /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1306,6 +1314,12 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; + else { + /* Only the initial fragment + * is time stamped. + */ + tx_flags = 0; + } } if (skb == NULL) goto error; @@ -1317,6 +1331,9 @@ alloc_new_skb: /* reserve for fragmentation */ skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); + if (sk->sk_type == SOCK_DGRAM) + skb_shinfo(skb)->tx_flags = tx_flags; + /* * Find where to start putting bytes */ -- cgit v1.1 From c8dcfd8a046c1f49af0c15726761af17b957962d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 27 Feb 2011 22:08:00 +0100 Subject: cfg80211: add a field for the bitrate of the last rx data packet from a station Also fix a typo in the STATION_INFO_TX_BITRATE description Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 56 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 864ddfb..4ebce42 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1968,13 +1968,41 @@ static int parse_station_flags(struct genl_info *info, return 0; } +static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, + int attr) +{ + struct nlattr *rate; + u16 bitrate; + + rate = nla_nest_start(msg, attr); + if (!rate) + goto nla_put_failure; + + /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ + bitrate = cfg80211_calculate_bitrate(info); + if (bitrate > 0) + NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate); + + if (info->flags & RATE_INFO_FLAGS_MCS) + NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, info->mcs); + if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) + NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH); + if (info->flags & RATE_INFO_FLAGS_SHORT_GI) + NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI); + + nla_nest_end(msg, rate); + return true; + +nla_put_failure: + return false; +} + static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo) { void *hdr; - struct nlattr *sinfoattr, *txrate; - u16 bitrate; + struct nlattr *sinfoattr; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) @@ -2013,24 +2041,14 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { - txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); - if (!txrate) + if (!nl80211_put_sta_rate(msg, &sinfo->txrate, + NL80211_STA_INFO_TX_BITRATE)) + goto nla_put_failure; + } + if (sinfo->filled & STATION_INFO_RX_BITRATE) { + if (!nl80211_put_sta_rate(msg, &sinfo->rxrate, + NL80211_STA_INFO_RX_BITRATE)) goto nla_put_failure; - - /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ - bitrate = cfg80211_calculate_bitrate(&sinfo->txrate); - if (bitrate > 0) - NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate); - - if (sinfo->txrate.flags & RATE_INFO_FLAGS_MCS) - NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, - sinfo->txrate.mcs); - if (sinfo->txrate.flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) - NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH); - if (sinfo->txrate.flags & RATE_INFO_FLAGS_SHORT_GI) - NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI); - - nla_nest_end(msg, txrate); } if (sinfo->filled & STATION_INFO_RX_PACKETS) NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS, -- cgit v1.1 From 3af6334c9e4fbf41ef0ebd3b4d5762f26b675c40 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 27 Feb 2011 22:08:01 +0100 Subject: mac80211: add support for showing the last rx bitrate Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 31 ++++++++++++++++++++++--------- net/mac80211/rx.c | 11 ++++++++++- net/mac80211/sta_info.h | 4 ++++ 3 files changed, 36 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8b436c7..7b701dc 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -316,6 +316,17 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, return 0; } +static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, int idx) +{ + if (!(rate->flags & RATE_INFO_FLAGS_MCS)) { + struct ieee80211_supported_band *sband; + sband = sta->local->hw.wiphy->bands[ + sta->local->hw.conf.channel->band]; + rate->legacy = sband->bitrates[idx].bitrate; + } else + rate->mcs = idx; +} + static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -330,6 +341,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) STATION_INFO_TX_RETRIES | STATION_INFO_TX_FAILED | STATION_INFO_TX_BITRATE | + STATION_INFO_RX_BITRATE | STATION_INFO_RX_DROP_MISC; sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); @@ -355,15 +367,16 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI) sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; - - if (!(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) { - struct ieee80211_supported_band *sband; - sband = sta->local->hw.wiphy->bands[ - sta->local->hw.conf.channel->band]; - sinfo->txrate.legacy = - sband->bitrates[sta->last_tx_rate.idx].bitrate; - } else - sinfo->txrate.mcs = sta->last_tx_rate.idx; + rate_idx_to_bitrate(&sinfo->txrate, sta, sta->last_tx_rate.idx); + + sinfo->rxrate.flags = 0; + if (sta->last_rx_rate_flag & RX_FLAG_HT) + sinfo->rxrate.flags |= RATE_INFO_FLAGS_MCS; + if (sta->last_rx_rate_flag & RX_FLAG_40MHZ) + sinfo->rxrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; + if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI) + sinfo->rxrate.flags |= RATE_INFO_FLAGS_SHORT_GI; + rate_idx_to_bitrate(&sinfo->rxrate, sta, sta->last_rx_rate_idx); if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5b53423..5c1930b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1156,14 +1156,23 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, NL80211_IFTYPE_ADHOC); - if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) + if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) { sta->last_rx = jiffies; + if (ieee80211_is_data(hdr->frame_control)) { + sta->last_rx_rate_idx = status->rate_idx; + sta->last_rx_rate_flag = status->flag; + } + } } else if (!is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. */ sta->last_rx = jiffies; + if (ieee80211_is_data(hdr->frame_control)) { + sta->last_rx_rate_idx = status->rate_idx; + sta->last_rx_rate_flag = status->flag; + } } if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index ca0b690..5768114 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -209,6 +209,8 @@ enum plink_state { * @rate_ctrl_priv: rate control private per-STA pointer * @last_tx_rate: rate used for last transmit, to report to userspace as * "the" transmit rate + * @last_rx_rate_idx: rx status rate index of the last data packet + * @last_rx_rate_flag: rx status flag of the last data packet * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses @@ -311,6 +313,8 @@ struct sta_info { unsigned long tx_bytes; unsigned long tx_fragments; struct ieee80211_tx_rate last_tx_rate; + int last_rx_rate_idx; + int last_rx_rate_flag; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; /* -- cgit v1.1 From e46395a4b3d32d161d8b6d8e4a002972b1faae3e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Mar 2011 17:18:26 +0100 Subject: mac80211: make rate control Kconfig warning depend on mac80211 ... Otherwise it is displayed when mac80211 isn't even turned on, which is completely pointless. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 4c57a9c..841dd1e 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -78,7 +78,7 @@ config MAC80211_RC_DEFAULT endif comment "Some wireless drivers require a rate control algorithm" - depends on MAC80211_HAS_RC=n + depends on MAC80211 && MAC80211_HAS_RC=n config MAC80211_MESH bool "Enable mac80211 mesh networking (pre-802.11s) support" -- cgit v1.1 From 5a2ef92023506d4e9cd13617b5a46b4d0f1b6747 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 1 Mar 2011 02:36:47 +0000 Subject: inet: Remove unused sk_sndmsg_* from UFO UFO doesn't really use the sk_sndmsg_* parameters so touching them is pointless. It can't use them anyway since the whole point of UFO is to use the original pages without copying. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 --- net/ipv4/ip_output.c | 1 - net/ipv6/ip6_output.c | 1 - 3 files changed, 5 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 14cf560..1eb526a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2434,8 +2434,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return -ENOMEM; /* initialize the next frag */ - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; skb_fill_page_desc(skb, frg_cnt, page, 0, 0); skb->truesize += PAGE_SIZE; atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc); @@ -2455,7 +2453,6 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return -EFAULT; /* copy was successful so update the size parameters */ - sk->sk_sndmsg_off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04c7b3b..d3a4540 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -767,7 +767,6 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - sk->sk_sndmsg_off = 0; /* specify the length of each IP datagram fragment */ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 065b3f7..5c618f2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1061,7 +1061,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - sk->sk_sndmsg_off = 0; } err = skb_append_datato_frags(sk,skb, getfrag, from, -- cgit v1.1 From 1470ddf7f8cecf776921e5ccee72e3d2b3d60cbc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 1 Mar 2011 02:36:47 +0000 Subject: inet: Remove explicit write references to sk/inet in ip_append_data In order to allow simultaneous calls to ip_append_data on the same socket, it must not modify any shared state in sk or inet (other than those that are designed to allow that such as atomic counters). This patch abstracts out write references to sk and inet_sk in ip_append_data and its friends so that we may use the underlying code in parallel. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 238 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 140 insertions(+), 98 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d3a4540..1dd5ecc 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -733,6 +733,7 @@ csum_page(struct page *page, int offset, int copy) } static inline int ip_ufo_append_data(struct sock *sk, + struct sk_buff_head *queue, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, @@ -745,7 +746,7 @@ static inline int ip_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + if ((skb = skb_peek_tail(queue)) == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -771,35 +772,24 @@ static inline int ip_ufo_append_data(struct sock *sk, /* specify the length of each IP datagram fragment */ skb_shinfo(skb)->gso_size = mtu - fragheaderlen; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); } return skb_append_datato_frags(sk, skb, getfrag, from, (length - transhdrlen)); } -/* - * ip_append_data() and ip_append_page() can make one large IP datagram - * from many pieces of data. Each pieces will be holded on the socket - * until ip_push_pending_frames() is called. Each piece can be a page - * or non-page data. - * - * Not only UDP, other transport protocols - e.g. raw sockets - can use - * this interface potentially. - * - * LATER: length must be adjusted by pad at tail, when it is required. - */ -int ip_append_data(struct sock *sk, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags) +static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, + struct inet_cork *cork, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - struct ip_options *opt = NULL; + struct ip_options *opt = inet->cork.opt; int hh_len; int exthdrlen; int mtu; @@ -808,58 +798,19 @@ int ip_append_data(struct sock *sk, int offset = 0; unsigned int maxfraglen, fragheaderlen; int csummode = CHECKSUM_NONE; - struct rtable *rt; - - if (flags&MSG_PROBE) - return 0; + struct rtable *rt = (struct rtable *)cork->dst; - if (skb_queue_empty(&sk->sk_write_queue)) { - /* - * setup for corking. - */ - opt = ipc->opt; - if (opt) { - if (inet->cork.opt == NULL) { - inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation); - if (unlikely(inet->cork.opt == NULL)) - return -ENOBUFS; - } - memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen); - inet->cork.flags |= IPCORK_OPT; - inet->cork.addr = ipc->addr; - } - rt = *rtp; - if (unlikely(!rt)) - return -EFAULT; - /* - * We steal reference to this route, caller should not release it - */ - *rtp = NULL; - inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path); - inet->cork.dst = &rt->dst; - inet->cork.length = 0; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; - exthdrlen = rt->dst.header_len; - length += exthdrlen; - transhdrlen += exthdrlen; - } else { - rt = (struct rtable *)inet->cork.dst; - if (inet->cork.flags & IPCORK_OPT) - opt = inet->cork.opt; + exthdrlen = transhdrlen ? rt->dst.header_len : 0; + length += exthdrlen; + transhdrlen += exthdrlen; + mtu = inet->cork.fragsize; - transhdrlen = 0; - exthdrlen = 0; - mtu = inet->cork.fragsize; - } hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; - if (inet->cork.length + length > 0xFFFF - fragheaderlen) { + if (cork->length + length > 0xFFFF - fragheaderlen) { ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu-exthdrlen); return -EMSGSIZE; @@ -875,15 +826,15 @@ int ip_append_data(struct sock *sk, !exthdrlen) csummode = CHECKSUM_PARTIAL; - skb = skb_peek_tail(&sk->sk_write_queue); + skb = skb_peek_tail(queue); - inet->cork.length += length; + cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { - err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, - fragheaderlen, transhdrlen, mtu, - flags); + err = ip_ufo_append_data(sk, queue, getfrag, from, length, + hh_len, fragheaderlen, transhdrlen, + mtu, flags); if (err) goto error; return 0; @@ -960,7 +911,7 @@ alloc_new_skb: else /* only the initial fragment is time stamped */ - ipc->tx_flags = 0; + cork->tx_flags = 0; } if (skb == NULL) goto error; @@ -971,7 +922,7 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); - skb_shinfo(skb)->tx_flags = ipc->tx_flags; + skb_shinfo(skb)->tx_flags = cork->tx_flags; /* * Find where to start putting bytes. @@ -1008,7 +959,7 @@ alloc_new_skb: /* * Put the packet on the pending queue. */ - __skb_queue_tail(&sk->sk_write_queue, skb); + __skb_queue_tail(queue, skb); continue; } @@ -1028,8 +979,8 @@ alloc_new_skb: } else { int i = skb_shinfo(skb)->nr_frags; skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = sk->sk_sndmsg_page; - int off = sk->sk_sndmsg_off; + struct page *page = cork->page; + int off = cork->off; unsigned int left; if (page && (left = PAGE_SIZE - off) > 0) { @@ -1041,7 +992,7 @@ alloc_new_skb: goto error; } get_page(page); - skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); + skb_fill_page_desc(skb, i, page, off, 0); frag = &skb_shinfo(skb)->frags[i]; } } else if (i < MAX_SKB_FRAGS) { @@ -1052,8 +1003,8 @@ alloc_new_skb: err = -ENOMEM; goto error; } - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; + cork->page = page; + cork->off = 0; skb_fill_page_desc(skb, i, page, 0, 0); frag = &skb_shinfo(skb)->frags[i]; @@ -1065,7 +1016,7 @@ alloc_new_skb: err = -EFAULT; goto error; } - sk->sk_sndmsg_off += copy; + cork->off += copy; frag->size += copy; skb->len += copy; skb->data_len += copy; @@ -1079,11 +1030,87 @@ alloc_new_skb: return 0; error: - inet->cork.length -= length; + cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); return err; } +static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, + struct ipcm_cookie *ipc, struct rtable **rtp) +{ + struct inet_sock *inet = inet_sk(sk); + struct ip_options *opt; + struct rtable *rt; + + /* + * setup for corking. + */ + opt = ipc->opt; + if (opt) { + if (cork->opt == NULL) { + cork->opt = kmalloc(sizeof(struct ip_options) + 40, + sk->sk_allocation); + if (unlikely(cork->opt == NULL)) + return -ENOBUFS; + } + memcpy(cork->opt, opt, sizeof(struct ip_options) + opt->optlen); + cork->flags |= IPCORK_OPT; + cork->addr = ipc->addr; + } + rt = *rtp; + if (unlikely(!rt)) + return -EFAULT; + /* + * We steal reference to this route, caller should not release it + */ + *rtp = NULL; + cork->fragsize = inet->pmtudisc == IP_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); + cork->dst = &rt->dst; + cork->length = 0; + cork->tx_flags = ipc->tx_flags; + cork->page = NULL; + cork->off = 0; + + return 0; +} + +/* + * ip_append_data() and ip_append_page() can make one large IP datagram + * from many pieces of data. Each pieces will be holded on the socket + * until ip_push_pending_frames() is called. Each piece can be a page + * or non-page data. + * + * Not only UDP, other transport protocols - e.g. raw sockets - can use + * this interface potentially. + * + * LATER: length must be adjusted by pad at tail, when it is required. + */ +int ip_append_data(struct sock *sk, + int getfrag(void *from, char *to, int offset, int len, + int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + struct ipcm_cookie *ipc, struct rtable **rtp, + unsigned int flags) +{ + struct inet_sock *inet = inet_sk(sk); + int err; + + if (flags&MSG_PROBE) + return 0; + + if (skb_queue_empty(&sk->sk_write_queue)) { + err = ip_setup_cork(sk, &inet->cork, ipc, rtp); + if (err) + return err; + } else { + transhdrlen = 0; + } + + return __ip_append_data(sk, &sk->sk_write_queue, &inet->cork, getfrag, + from, length, transhdrlen, flags); +} + ssize_t ip_append_page(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1227,40 +1254,42 @@ error: return err; } -static void ip_cork_release(struct inet_sock *inet) +static void ip_cork_release(struct inet_cork *cork) { - inet->cork.flags &= ~IPCORK_OPT; - kfree(inet->cork.opt); - inet->cork.opt = NULL; - dst_release(inet->cork.dst); - inet->cork.dst = NULL; + cork->flags &= ~IPCORK_OPT; + kfree(cork->opt); + cork->opt = NULL; + dst_release(cork->dst); + cork->dst = NULL; } /* * Combined all pending IP fragments on the socket as one IP datagram * and push them out. */ -int ip_push_pending_frames(struct sock *sk) +static int __ip_push_pending_frames(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork *cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct inet_sock *inet = inet_sk(sk); struct net *net = sock_net(sk); struct ip_options *opt = NULL; - struct rtable *rt = (struct rtable *)inet->cork.dst; + struct rtable *rt = (struct rtable *)cork->dst; struct iphdr *iph; __be16 df = 0; __u8 ttl; int err = 0; - if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) + if ((skb = __skb_dequeue(queue)) == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); - while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); @@ -1286,8 +1315,8 @@ int ip_push_pending_frames(struct sock *sk) ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); - if (inet->cork.flags & IPCORK_OPT) - opt = inet->cork.opt; + if (cork->flags & IPCORK_OPT) + opt = cork->opt; if (rt->rt_type == RTN_MULTICAST) ttl = inet->mc_ttl; @@ -1299,7 +1328,7 @@ int ip_push_pending_frames(struct sock *sk) iph->ihl = 5; if (opt) { iph->ihl += opt->optlen>>2; - ip_options_build(skb, opt, inet->cork.addr, rt, 0); + ip_options_build(skb, opt, cork->addr, rt, 0); } iph->tos = inet->tos; iph->frag_off = df; @@ -1315,7 +1344,7 @@ int ip_push_pending_frames(struct sock *sk) * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount */ - inet->cork.dst = NULL; + cork->dst = NULL; skb_dst_set(skb, &rt->dst); if (iph->protocol == IPPROTO_ICMP) @@ -1332,7 +1361,7 @@ int ip_push_pending_frames(struct sock *sk) } out: - ip_cork_release(inet); + ip_cork_release(cork); return err; error: @@ -1340,17 +1369,30 @@ error: goto out; } +int ip_push_pending_frames(struct sock *sk) +{ + return __ip_push_pending_frames(sk, &sk->sk_write_queue, + &inet_sk(sk)->cork); +} + /* * Throw away all pending data on the socket. */ -void ip_flush_pending_frames(struct sock *sk) +static void __ip_flush_pending_frames(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork *cork) { struct sk_buff *skb; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) + while ((skb = __skb_dequeue_tail(queue)) != NULL) kfree_skb(skb); - ip_cork_release(inet_sk(sk)); + ip_cork_release(cork); +} + +void ip_flush_pending_frames(struct sock *sk) +{ + __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); } -- cgit v1.1 From 1c32c5ad6fac8cee1a77449f5abf211e911ff830 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 1 Mar 2011 02:36:47 +0000 Subject: inet: Add ip_make_skb and ip_finish_skb This patch adds the helper ip_make_skb which is like ip_append_data and ip_push_pending_frames all rolled into one, except that it does not send the skb produced. The sending part is carried out by ip_send_skb, which the transport protocol can call after it has tweaked the skb. It is meant to be called in cases where corking is not used should have a one-to-one correspondence to sendmsg. This patch also adds the helper ip_finish_skb which is meant to be replace ip_push_pending_frames when corking is required. Previously the protocol stack would peek at the socket write queue and add its header to the first packet. With ip_finish_skb, the protocol stack can directly operate on the final skb instead, just like the non-corking case with ip_make_skb. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 65 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1dd5ecc..460308c 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1267,9 +1267,9 @@ static void ip_cork_release(struct inet_cork *cork) * Combined all pending IP fragments on the socket as one IP datagram * and push them out. */ -static int __ip_push_pending_frames(struct sock *sk, - struct sk_buff_head *queue, - struct inet_cork *cork) +struct sk_buff *__ip_make_skb(struct sock *sk, + struct sk_buff_head *queue, + struct inet_cork *cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; @@ -1280,7 +1280,6 @@ static int __ip_push_pending_frames(struct sock *sk, struct iphdr *iph; __be16 df = 0; __u8 ttl; - int err = 0; if ((skb = __skb_dequeue(queue)) == NULL) goto out; @@ -1351,28 +1350,37 @@ static int __ip_push_pending_frames(struct sock *sk, icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); - /* Netfilter gets whole the not fragmented skb. */ + ip_cork_release(cork); +out: + return skb; +} + +int ip_send_skb(struct sk_buff *skb) +{ + struct net *net = sock_net(skb->sk); + int err; + err = ip_local_out(skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) - goto error; + IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); } -out: - ip_cork_release(cork); return err; - -error: - IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); - goto out; } int ip_push_pending_frames(struct sock *sk) { - return __ip_push_pending_frames(sk, &sk->sk_write_queue, - &inet_sk(sk)->cork); + struct sk_buff *skb; + + skb = ip_finish_skb(sk); + if (!skb) + return 0; + + /* Netfilter gets whole the not fragmented skb. */ + return ip_send_skb(skb); } /* @@ -1395,6 +1403,35 @@ void ip_flush_pending_frames(struct sock *sk) __ip_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork); } +struct sk_buff *ip_make_skb(struct sock *sk, + int getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + struct ipcm_cookie *ipc, struct rtable **rtp, + unsigned int flags) +{ + struct inet_cork cork = {}; + struct sk_buff_head queue; + int err; + + if (flags & MSG_PROBE) + return NULL; + + __skb_queue_head_init(&queue); + + err = ip_setup_cork(sk, &cork, ipc, rtp); + if (err) + return ERR_PTR(err); + + err = __ip_append_data(sk, &queue, &cork, getfrag, + from, length, transhdrlen, flags); + if (err) { + __ip_flush_pending_frames(sk, &queue, &cork); + return ERR_PTR(err); + } + + return __ip_make_skb(sk, &queue, &cork); +} /* * Fetch data from kernel space and fill in checksum if needed. -- cgit v1.1 From f6b9664f8b711cf4fd53e70aa0d21f72d5bf806c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 1 Mar 2011 02:36:48 +0000 Subject: udp: Switch to ip_finish_skb This patch converts UDP to use the new ip_finish_skb API. This would then allows us to more easily use ip_make_skb which allows UDP to run without a socket lock. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 83 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d37baaa..61c22ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -663,75 +663,72 @@ void udp_flush_pending_frames(struct sock *sk) EXPORT_SYMBOL(udp_flush_pending_frames); /** - * udp4_hwcsum_outgoing - handle outgoing HW checksumming - * @sk: socket we are sending on + * udp4_hwcsum - handle outgoing HW checksumming * @skb: sk_buff containing the filled-in UDP header * (checksum field must be zeroed out) + * @src: source IP address + * @dst: destination IP address */ -static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, int len) +static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) { - unsigned int offset; struct udphdr *uh = udp_hdr(skb); + struct sk_buff *frags = skb_shinfo(skb)->frag_list; + int offset = skb_transport_offset(skb); + int len = skb->len - offset; + int hlen = len; __wsum csum = 0; - if (skb_queue_len(&sk->sk_write_queue) == 1) { + if (!frags) { /* * Only one fragment on the socket. */ skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); + uh->check = ~csum_tcpudp_magic(src, dst, len, + IPPROTO_UDP, 0); } else { /* * HW-checksum won't work as there are two or more * fragments on the socket so that all csums of sk_buffs * should be together */ - offset = skb_transport_offset(skb); - skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + do { + csum = csum_add(csum, frags->csum); + hlen -= frags->len; + } while ((frags = frags->next)); + csum = skb_checksum(skb, offset, hlen, csum); skb->ip_summed = CHECKSUM_NONE; - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } - uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } } -/* - * Push out all pending data as one UDP datagram. Socket is locked. - */ -static int udp_push_pending_frames(struct sock *sk) +static int udp_send_skb(struct sk_buff *skb, __be32 daddr, __be32 dport) { - struct udp_sock *up = udp_sk(sk); + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; - struct sk_buff *skb; struct udphdr *uh; + struct rtable *rt = (struct rtable *)skb_dst(skb); int err = 0; int is_udplite = IS_UDPLITE(sk); + int offset = skb_transport_offset(skb); + int len = skb->len - offset; __wsum csum = 0; - /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) - goto out; - /* * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; - uh->len = htons(up->len); + uh->source = inet->inet_sport; + uh->dest = dport; + uh->len = htons(len); uh->check = 0; if (is_udplite) /* UDP-Lite */ - csum = udplite_csum_outgoing(sk, skb); + csum = udplite_csum(skb); else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ @@ -740,20 +737,20 @@ static int udp_push_pending_frames(struct sock *sk) } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); + udp4_hwcsum(skb, rt->rt_src, daddr); goto send; - } else /* `normal' UDP */ - csum = udp_csum_outgoing(sk, skb); + } else + csum = udp_csum(skb); /* add protocol-dependent pseudo-header */ - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, + uh->check = csum_tcpudp_magic(rt->rt_src, daddr, len, sk->sk_protocol, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; send: - err = ip_push_pending_frames(sk); + err = ip_send_skb(skb); if (err) { if (err == -ENOBUFS && !inet->recverr) { UDP_INC_STATS_USER(sock_net(sk), @@ -763,6 +760,26 @@ send: } else UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +/* + * Push out all pending data as one UDP datagram. Socket is locked. + */ +static int udp_push_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct flowi *fl = &inet->cork.fl; + struct sk_buff *skb; + int err = 0; + + skb = ip_finish_skb(sk); + if (!skb) + goto out; + + err = udp_send_skb(skb, fl->fl4_dst, fl->fl_ip_dport); + out: up->len = 0; up->pending = 0; -- cgit v1.1 From 903ab86d195cca295379699299c5fc10beba31c7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 1 Mar 2011 02:36:48 +0000 Subject: udp: Add lockless transmit path The UDP transmit path has been running under the socket lock for a long time because of the corking feature. This means that transmitting to the same socket in multiple threads does not scale at all. However, as most users don't actually use corking, the locking can be removed in the common case. This patch creates a lockless fast path where corking is not used. Please note that this does create a slight inaccuracy in the enforcement of socket send buffer limits. In particular, we may exceed the socket limit by up to (number of CPUs) * (packet size) because of the way the limit is computed. As the primary purpose of socket buffers is to indicate congestion, this should not be a great problem for now. Signed-off-by: Herbert Xu Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 61c22ee..8155d6e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -802,6 +802,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int err, is_udplite = IS_UDPLITE(sk); int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + struct sk_buff *skb; if (len > 0xFFFF) return -EMSGSIZE; @@ -816,6 +817,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.tx_flags = 0; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + if (up->pending) { /* * There are pending frames. @@ -940,6 +943,17 @@ back_from_confirm: if (!ipc.addr) daddr = ipc.addr = rt->rt_dst; + /* Lockless fast path for the non-corking case. */ + if (!corkreq) { + skb = ip_make_skb(sk, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, &rt, + msg->msg_flags); + err = PTR_ERR(skb); + if (skb && !IS_ERR(skb)) + err = udp_send_skb(skb, daddr, dport); + goto out; + } + lock_sock(sk); if (unlikely(up->pending)) { /* The socket is already corked while preparing it. */ @@ -961,7 +975,6 @@ back_from_confirm: do_append_data: up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); -- cgit v1.1 From 68d0c6d34d586a893292d4fb633a3bf8c547b222 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 13:19:07 -0800 Subject: ipv6: Consolidate route lookup sequences. Route lookups follow a general pattern in the ipv6 code wherein we first find the non-IPSEC route, potentially override the flow destination address due to ipv6 options settings, and then finally make an IPSEC search using either xfrm_lookup() or __xfrm_lookup(). __xfrm_lookup() is used when we want to generate a blackhole route if the key manager needs to resolve the IPSEC rules (in this case -EREMOTE is returned and the original 'dst' is left unchanged). Otherwise plain xfrm_lookup() is used and when asynchronous IPSEC resolution is necessary, we simply fail the lookup completely. All of these cases are encapsulated into two routines, ip6_dst_lookup_flow and ip6_sk_dst_lookup_flow. The latter of which handles unconnected UDP datagram sockets. Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 65 ++++++++++---------------------- net/ipv6/af_inet6.c | 17 +++------ net/ipv6/datagram.c | 15 ++------ net/ipv6/inet6_connection_sock.c | 25 +++---------- net/ipv6/ip6_output.c | 80 ++++++++++++++++++++++++++++++++++------ net/ipv6/raw.c | 15 ++------ net/ipv6/syncookies.c | 7 +--- net/ipv6/tcp_ipv6.c | 57 +++++++++++----------------- net/ipv6/udp.c | 15 ++------ 9 files changed, 134 insertions(+), 162 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 460d545..5efc57f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -162,15 +162,9 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) { - sk->sk_err_soft = -err; - goto out; - } - - err = xfrm_lookup(net, &dst, &fl, sk, 0); - if (err < 0) { - sk->sk_err_soft = -err; + dst = ip6_dst_lookup_flow(sk, &fl, NULL, false); + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); goto out; } } else @@ -267,16 +261,12 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, final_p = fl6_update_dst(&fl, opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto done; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); - if (err < 0) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto done; + } skb = dccp_make_response(sk, dst, req); if (skb != NULL) { @@ -338,14 +328,13 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ - if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { - if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { - skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl, NULL); - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); - return; - } + dst = ip6_dst_lookup_flow(ctl_sk, &fl, NULL, false); + if (!IS_ERR(dst)) { + skb_dst_set(skb, dst); + ip6_xmit(ctl_sk, skb, &fl, NULL); + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + return; } kfree_skb(skb); @@ -550,13 +539,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, fl.fl_ip_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, &fl); - if (ip6_dst_lookup(sk, &dst, &fl)) - goto out; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + if (IS_ERR(dst)) goto out; } @@ -979,19 +963,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, final_p = fl6_update_dst(&fl, np->opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto failure; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto failure; } if (saddr == NULL) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3194aa9..a88b2e9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -644,9 +644,8 @@ EXPORT_SYMBOL(inet6_unregister_protosw); int inet6_sk_rebuild_header(struct sock *sk) { - int err; - struct dst_entry *dst; struct ipv6_pinfo *np = inet6_sk(sk); + struct dst_entry *dst; dst = __sk_dst_check(sk, np->dst_cookie); @@ -668,17 +667,11 @@ int inet6_sk_rebuild_header(struct sock *sk) final_p = fl6_update_dst(&fl, np->opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) { + dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + if (IS_ERR(dst)) { sk->sk_route_caps = 0; - return err; - } - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { - sk->sk_err_soft = -err; - return err; + sk->sk_err_soft = -PTR_ERR(dst); + return PTR_ERR(dst); } __ip6_dst_store(sk, dst, NULL, NULL); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 320bdb8..be3a781 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -162,18 +162,11 @@ ipv4_connected: opt = flowlabel ? flowlabel->opt : np->opt; final_p = fl6_update_dst(&fl, opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + err = 0; + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto out; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto out; } /* source address lookup done in ip6_dst_lookup */ diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d144e62..d687e13 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -74,13 +74,8 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); - if (ip6_dst_lookup(sk, &dst, &fl)) - return NULL; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + if (IS_ERR(dst)) return NULL; return dst; @@ -234,21 +229,13 @@ int inet6_csk_xmit(struct sk_buff *skb) dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (dst == NULL) { - int err = ip6_dst_lookup(sk, &dst, &fl); - - if (err) { - sk->sk_err_soft = -err; - kfree_skb(skb); - return err; - } - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); + dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); - if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); sk->sk_route_caps = 0; kfree_skb(skb); - return err; + return PTR_ERR(dst); } __inet6_csk_dst_store(sk, dst, NULL, NULL); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5c618f2..28209b2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1002,29 +1002,87 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** - * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * ip6_dst_lookup_flow - perform route lookup on flow with ipsec + * @sk: socket which provides route info + * @fl: flow to lookup + * @final_dst: final destination address for ipsec lookup + * @want_blackhole: IPSEC blackhole handling desired + * + * This function performs a route lookup on the given flow. + * + * It returns a valid dst pointer on success, or a pointer encoded + * error code. + */ +struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, + const struct in6_addr *final_dst, + bool want_blackhole) +{ + struct dst_entry *dst = NULL; + int err; + + err = ip6_dst_lookup_tail(sk, &dst, fl); + if (err) + return ERR_PTR(err); + if (final_dst) + ipv6_addr_copy(&fl->fl6_dst, final_dst); + if (want_blackhole) { + err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, XFRM_LOOKUP_WAIT); + if (err == -EREMOTE) + err = ip6_dst_blackhole(sk, &dst, fl); + if (err) + return ERR_PTR(err); + } else { + err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); + if (err) + return ERR_PTR(err); + } + return dst; +} +EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); + +/** + * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow * @sk: socket which provides the dst cache and route info - * @dst: pointer to dst_entry * for result * @fl: flow to lookup + * @final_dst: final destination address for ipsec lookup + * @want_blackhole: IPSEC blackhole handling desired * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. * It will take the socket dst lock when operating on the dst cache. * As a result, this function can only be used in process context. * - * It returns zero on success, or a standard errno code on error. + * It returns a valid dst pointer on success, or a pointer encoded + * error code. */ -int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, + const struct in6_addr *final_dst, + bool want_blackhole) { - *dst = NULL; - if (sk) { - *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - *dst = ip6_sk_dst_check(sk, *dst, fl); - } + struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + int err; - return ip6_dst_lookup_tail(sk, dst, fl); + dst = ip6_sk_dst_check(sk, dst, fl); + + err = ip6_dst_lookup_tail(sk, &dst, fl); + if (err) + return ERR_PTR(err); + if (final_dst) + ipv6_addr_copy(&fl->fl6_dst, final_dst); + if (want_blackhole) { + err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, XFRM_LOOKUP_WAIT); + if (err == -EREMOTE) + err = ip6_dst_blackhole(sk, &dst, fl); + if (err) + return ERR_PTR(err); + } else { + err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); + if (err) + return ERR_PTR(err); + } + return dst; } -EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 364e866..dc29b07 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -856,20 +856,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl.oif = np->mcast_oif; security_sk_classify_flow(sk, &fl); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto out; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto out; } - if (hlimit < 0) { if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 09fd34f..0b4cf35 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -243,12 +243,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, &fl); - if (ip6_dst_lookup(sk, &dst, &fl)) - goto out_free; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + if (IS_ERR(dst)) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1d0ab55..e59a31c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -255,18 +255,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, security_sk_classify_flow(sk, &fl); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto failure; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto failure; } if (saddr == NULL) { @@ -385,7 +377,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == ICMPV6_PKT_TOOBIG) { - struct dst_entry *dst = NULL; + struct dst_entry *dst; if (sock_owned_by_user(sk)) goto out; @@ -413,13 +405,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl_ip_sport = inet->inet_sport; security_skb_classify_flow(skb, &fl); - if ((err = ip6_dst_lookup(sk, &dst, &fl))) { - sk->sk_err_soft = -err; - goto out; - } - - if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) { - sk->sk_err_soft = -err; + dst = ip6_dst_lookup_flow(sk, &fl, NULL, false); + if (IS_ERR(dst)) { + sk->sk_err_soft = -PTR_ERR(dst); goto out; } @@ -496,7 +484,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct in6_addr * final_p, final; struct flowi fl; struct dst_entry *dst; - int err = -1; + int err; memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_TCP; @@ -512,15 +500,13 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, opt = np->opt; final_p = fl6_update_dst(&fl, opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto done; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) - goto done; - + } skb = tcp_make_synack(sk, dst, req, rvp); + err = -ENOMEM; if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -1079,15 +1065,14 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * Underlying function will use this to retrieve the network * namespace */ - if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { - if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { - skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl, NULL); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); - if (rst) - TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); - return; - } + dst = ip6_dst_lookup_flow(ctl_sk, &fl, NULL, false); + if (!IS_ERR(dst)) { + skb_dst_set(buff, dst); + ip6_xmit(ctl_sk, buff, &fl, NULL); + TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + if (rst) + TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + return; } kfree_skb(buff); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a419a78..d86d7f6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1125,18 +1125,11 @@ do_udp_sendmsg: security_sk_classify_flow(sk, &fl); - err = ip6_sk_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_sk_dst_lookup_flow(sk, &fl, final_p, true); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto out; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto out; } if (hlimit < 0) { -- cgit v1.1 From abdf7e7239da270e68262728f125ea94b9b7d42d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 14:15:24 -0800 Subject: ipv4: Can final ip_route_connect() arg to boolean "can_sleep". Since that's what the current vague "flags" thing means. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 9379891..8372d5c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -69,7 +69,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, - orig_sport, orig_dport, sk, 1); + orig_sport, orig_dport, sk, true); if (tmp < 0) return tmp; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7ceb804..d16687d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1115,7 +1115,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, sk->sk_protocol, - inet->inet_sport, inet->inet_dport, sk, 0); + inet->inet_sport, inet->inet_dport, sk, false); if (err) return err; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 174be6c..eaee1ed 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -49,7 +49,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, sk->sk_protocol, - inet->inet_sport, usin->sin_port, sk, 1); + inet->inet_sport, usin->sin_port, sk, true); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 27a0cc8..05bc6d9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -173,7 +173,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_TCP, - orig_sport, orig_dport, sk, 1); + orig_sport, orig_dport, sk, true); if (tmp < 0) { if (tmp == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 110efb7..28e876a 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -323,7 +323,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, IPPROTO_L2TP, - 0, 0, sk, 1); + 0, 0, sk, true); if (rc) { if (rc == -ENETUNREACH) IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); -- cgit v1.1 From 420d44daa7aa1cc847e9e527f0a27a9ce61768ca Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 14:19:23 -0800 Subject: ipv4: Make final arg to ip_route_output_flow to be boolean "can_sleep" Since that is what the current vague "flags" argument means. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 6 +++--- net/ipv4/udp.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8372d5c..3d4b82f 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { + if (ip_route_output_flow(net, &rt, &fl, sk, false)) { IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d16687d..7d90fe0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1174,7 +1174,7 @@ int inet_sk_rebuild_header(struct sock *sk) }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, false); } if (!err) sk_setup_caps(sk, &rt->dst); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 97e5fb7..0caeb69 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -369,7 +369,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, 0)) + if (ip_route_output_flow(net, &rt, &fl, sk, false)) goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 460308c..e6905c5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -355,7 +355,7 @@ int ip_queue_xmit(struct sk_buff *skb) * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) + if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, false)) goto no_route; } sk_setup_caps(sk, &rt->dst); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 6390ba2..e185765 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -563,7 +563,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, true); } if (err) goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 52b077d..1ac3eca 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2720,7 +2720,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi } int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, - struct sock *sk, int flags) + struct sock *sk, bool can_sleep) { int err; @@ -2733,7 +2733,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, - flags ? XFRM_LOOKUP_WAIT : 0); + can_sleep ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) err = ipv4_dst_blackhole(net, rp, flp); @@ -2746,7 +2746,7 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow); int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) { - return ip_route_output_flow(net, rp, flp, NULL, 0); + return ip_route_output_flow(net, rp, flp, NULL, false); } EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8155d6e..790187b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -920,7 +920,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct net *net = sock_net(sk); security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(net, &rt, &fl, sk, 1); + err = ip_route_output_flow(net, &rt, &fl, sk, true); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 28e876a..7744a8e 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -489,7 +489,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) + if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, false)) goto no_route; } sk_setup_caps(sk, &rt->dst); -- cgit v1.1 From 5df65e5567a497a28067019b8ff08f98fb026629 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 14:22:19 -0800 Subject: net: Add FLOWI_FLAG_CAN_SLEEP. And set is in contexts where the route resolution can sleep. Signed-off-by: David S. Miller --- net/ipv4/raw.c | 3 ++- net/ipv4/udp.c | 6 ++++-- net/ipv6/ip6_output.c | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e185765..e8e8613 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -555,7 +555,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .fl4_tos = tos, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - }; + .flags = FLOWI_FLAG_CAN_SLEEP, + }; if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl, msg); if (err) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 790187b..c6bcc93 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -914,9 +914,11 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .fl4_src = saddr, .fl4_tos = tos, .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), + .flags = (inet_sk_flowi_flags(sk) | + FLOWI_FLAG_CAN_SLEEP), .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = dport }; + .fl_ip_dport = dport + }; struct net *net = sock_net(sk); security_sk_classify_flow(sk, &fl); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 28209b2..77b1942 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1026,6 +1026,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); if (want_blackhole) { + fl->flags |= FLOWI_FLAG_CAN_SLEEP; err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, XFRM_LOOKUP_WAIT); if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, fl); @@ -1070,6 +1071,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); if (want_blackhole) { + fl->flags |= FLOWI_FLAG_CAN_SLEEP; err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, XFRM_LOOKUP_WAIT); if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, fl); -- cgit v1.1 From 273447b352e69c327efdecfd6e1d6fe3edbdcd14 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 14:27:04 -0800 Subject: ipv4: Kill can_sleep arg to ip_route_output_flow() This boolean state is now available in the flow flags. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/route.c | 7 ++++--- net/ipv4/udp.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- 8 files changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3d4b82f..a8ff955 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, false)) { + if (ip_route_output_flow(net, &rt, &fl, sk)) { IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7d90fe0..44513bb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1174,7 +1174,7 @@ int inet_sk_rebuild_header(struct sock *sk) }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, false); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk); } if (!err) sk_setup_caps(sk, &rt->dst); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0caeb69..7f85d4a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -369,7 +369,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, false)) + if (ip_route_output_flow(net, &rt, &fl, sk)) goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e6905c5..68dbe2d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -355,7 +355,7 @@ int ip_queue_xmit(struct sk_buff *skb) * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, false)) + if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk)) goto no_route; } sk_setup_caps(sk, &rt->dst); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e8e8613..d7a2d1e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -564,7 +564,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, true); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk); } if (err) goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1ac3eca..7846265 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2720,7 +2720,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi } int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, - struct sock *sk, bool can_sleep) + struct sock *sk) { int err; @@ -2733,7 +2733,8 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, - can_sleep ? XFRM_LOOKUP_WAIT : 0); + ((flp->flags & FLOWI_FLAG_CAN_SLEEP) ? + XFRM_LOOKUP_WAIT : 0)); if (err == -EREMOTE) err = ipv4_dst_blackhole(net, rp, flp); @@ -2746,7 +2747,7 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow); int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) { - return ip_route_output_flow(net, rp, flp, NULL, false); + return ip_route_output_flow(net, rp, flp, NULL); } EXPORT_SYMBOL(ip_route_output_key); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c6bcc93..ed9a5b7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -922,7 +922,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct net *net = sock_net(sk); security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(net, &rt, &fl, sk, true); + err = ip_route_output_flow(net, &rt, &fl, sk); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 7744a8e..5381ceb 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -489,7 +489,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, false)) + if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk)) goto no_route; } sk_setup_caps(sk, &rt->dst); -- cgit v1.1 From a1414715f0ac905fb4b3a158ff6548d37bbe6165 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 14:32:04 -0800 Subject: ipv6: Change final dst lookup arg name to "can_sleep" Since it indicates whether we are invoked from a sleepable context or not. Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 77b1942..b5f8769 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1006,7 +1006,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * @sk: socket which provides route info * @fl: flow to lookup * @final_dst: final destination address for ipsec lookup - * @want_blackhole: IPSEC blackhole handling desired + * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow. * @@ -1015,7 +1015,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); */ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, const struct in6_addr *final_dst, - bool want_blackhole) + bool can_sleep) { struct dst_entry *dst = NULL; int err; @@ -1025,7 +1025,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, return ERR_PTR(err); if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); - if (want_blackhole) { + if (can_sleep) { fl->flags |= FLOWI_FLAG_CAN_SLEEP; err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, XFRM_LOOKUP_WAIT); if (err == -EREMOTE) @@ -1046,7 +1046,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * @sk: socket which provides the dst cache and route info * @fl: flow to lookup * @final_dst: final destination address for ipsec lookup - * @want_blackhole: IPSEC blackhole handling desired + * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. @@ -1058,7 +1058,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, const struct in6_addr *final_dst, - bool want_blackhole) + bool can_sleep) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); int err; @@ -1070,7 +1070,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, return ERR_PTR(err); if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); - if (want_blackhole) { + if (can_sleep) { fl->flags |= FLOWI_FLAG_CAN_SLEEP; err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, XFRM_LOOKUP_WAIT); if (err == -EREMOTE) -- cgit v1.1 From 80c0bc9e37adfc892af82cb6aa8cace79f8a96cb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 14:36:37 -0800 Subject: xfrm: Kill XFRM_LOOKUP_WAIT flag. This can be determined from the flow flags instead. Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 5 +++-- net/ipv4/route.c | 4 +--- net/ipv6/ip6_output.c | 4 ++-- net/xfrm/xfrm_policy.c | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 06c054d..0877147 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1233,8 +1233,9 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); if (err == 0 && fl->proto) { - err = xfrm_lookup(&init_net, pprt, fl, sk, - (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); + if (!(flags & MSG_DONTWAIT)) + fl->flags |= FLOWI_FLAG_CAN_SLEEP; + err = xfrm_lookup(&init_net, pprt, fl, sk, 0); } return err; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7846265..23d2050 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2732,9 +2732,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, - ((flp->flags & FLOWI_FLAG_CAN_SLEEP) ? - XFRM_LOOKUP_WAIT : 0)); + err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, 0); if (err == -EREMOTE) err = ipv4_dst_blackhole(net, rp, flp); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b5f8769..faf7b9d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1027,7 +1027,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, ipv6_addr_copy(&fl->fl6_dst, final_dst); if (can_sleep) { fl->flags |= FLOWI_FLAG_CAN_SLEEP; - err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, XFRM_LOOKUP_WAIT); + err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, fl); if (err) @@ -1072,7 +1072,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, ipv6_addr_copy(&fl->fl6_dst, final_dst); if (can_sleep) { fl->flags |= FLOWI_FLAG_CAN_SLEEP; - err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, XFRM_LOOKUP_WAIT); + err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, fl); if (err) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 41a91d2..f4c7467 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1831,7 +1831,7 @@ restart: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); return -EREMOTE; } - if (flags & XFRM_LOOKUP_WAIT) { + if (fl->flags & FLOWI_FLAG_CAN_SLEEP) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(&net->xfrm.km_waitq, &wait); -- cgit v1.1 From 69ead7afdf6028184f713a77376ee26f8aaafdcd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 14:45:33 -0800 Subject: ipv6: Normalize arguments to ip6_dst_blackhole(). Return a dst pointer which is potentitally error encoded. Don't pass original dst pointer by reference, pass a struct net instead of a socket, and elide the flow argument since it is unnecessary. Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/route.c | 12 +++++------- 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index faf7b9d..ac16f3b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1029,7 +1029,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, fl->flags |= FLOWI_FLAG_CAN_SLEEP; err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, fl); + return ip6_dst_blackhole(sock_net(sk), dst); if (err) return ERR_PTR(err); } else { @@ -1074,7 +1074,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, fl->flags |= FLOWI_FLAG_CAN_SLEEP; err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, fl); + return ip6_dst_blackhole(sock_net(sk), dst); if (err) return ERR_PTR(err); } else { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7e9443f..cf6fdea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -870,11 +870,10 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, EXPORT_SYMBOL(ip6_route_output); -int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) +struct dst_entry *ip6_dst_blackhole(struct net *net, struct dst_entry *dst_orig) { - struct rt6_info *ort = (struct rt6_info *) *dstp; - struct rt6_info *rt = (struct rt6_info *) - dst_alloc(&ip6_dst_blackhole_ops, 1); + struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1); + struct rt6_info *ort = (struct rt6_info *) dst_orig; struct dst_entry *new = NULL; if (rt) { @@ -905,9 +904,8 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl dst_free(new); } - dst_release(*dstp); - *dstp = new; - return new ? 0 : -ENOMEM; + dst_release(dst_orig); + return new ? new : ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(ip6_dst_blackhole); -- cgit v1.1 From 2774c131b1d19920b4587db1cfbd6f0750ad1f15 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 14:59:04 -0800 Subject: xfrm: Handle blackhole route creation via afinfo. That way we don't have to potentially do this in every xfrm_lookup() caller. Signed-off-by: David S. Miller --- net/ipv4/route.c | 20 +++++++------------- net/ipv4/xfrm4_policy.c | 1 + net/ipv6/ip6_output.c | 32 ++++++++++---------------------- net/ipv6/route.c | 3 +-- net/ipv6/xfrm6_policy.c | 1 + net/xfrm/xfrm_policy.c | 46 ++++++++++++++++++++++++++-------------------- 6 files changed, 46 insertions(+), 57 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 23d2050..e24e4cf 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2675,12 +2675,10 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .update_pmtu = ipv4_rt_blackhole_update_pmtu, }; - -static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) +struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { - struct rtable *ort = *rp; - struct rtable *rt = (struct rtable *) - dst_alloc(&ipv4_dst_blackhole_ops, 1); + struct rtable *rt = dst_alloc(&ipv4_dst_blackhole_ops, 1); + struct rtable *ort = (struct rtable *) dst_orig; if (rt) { struct dst_entry *new = &rt->dst; @@ -2714,9 +2712,9 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi dst_free(new); } - dst_release(&(*rp)->dst); - *rp = rt; - return rt ? 0 : -ENOMEM; + dst_release(dst_orig); + + return rt ? &rt->dst : ERR_PTR(-ENOMEM); } int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, @@ -2732,11 +2730,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, 0); - if (err == -EREMOTE) - err = ipv4_dst_blackhole(net, rp, flp); - - return err; + return xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, 0); } return 0; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 63aa88e..5f0f058 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -234,6 +234,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .get_tos = xfrm4_get_tos, .init_path = xfrm4_init_path, .fill_dst = xfrm4_fill_dst, + .blackhole_route = ipv4_blackhole_route, }; #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ac16f3b..35a4ad9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1025,18 +1025,12 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, return ERR_PTR(err); if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); - if (can_sleep) { + if (can_sleep) fl->flags |= FLOWI_FLAG_CAN_SLEEP; - err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); - if (err == -EREMOTE) - return ip6_dst_blackhole(sock_net(sk), dst); - if (err) - return ERR_PTR(err); - } else { - err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); - if (err) - return ERR_PTR(err); - } + + err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); + if (err) + return ERR_PTR(err); return dst; } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); @@ -1070,18 +1064,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, return ERR_PTR(err); if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); - if (can_sleep) { + if (can_sleep) fl->flags |= FLOWI_FLAG_CAN_SLEEP; - err = __xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); - if (err == -EREMOTE) - return ip6_dst_blackhole(sock_net(sk), dst); - if (err) - return ERR_PTR(err); - } else { - err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); - if (err) - return ERR_PTR(err); - } + + err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); + if (err) + return ERR_PTR(err); return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cf6fdea..053a92e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -870,7 +870,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, EXPORT_SYMBOL(ip6_route_output); -struct dst_entry *ip6_dst_blackhole(struct net *net, struct dst_entry *dst_orig) +struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1); struct rt6_info *ort = (struct rt6_info *) dst_orig; @@ -907,7 +907,6 @@ struct dst_entry *ip6_dst_blackhole(struct net *net, struct dst_entry *dst_orig) dst_release(dst_orig); return new ? new : ERR_PTR(-ENOMEM); } -EXPORT_SYMBOL_GPL(ip6_dst_blackhole); /* * Destination cache support functions diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index c128ca1..48ce496 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -274,6 +274,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_tos = xfrm6_get_tos, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, + .blackhole_route = ip6_blackhole_route, }; static int __init xfrm6_policy_init(void) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f4c7467..0248afa 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1735,14 +1735,31 @@ error: return ERR_PTR(err); } +static struct dst_entry *make_blackhole(struct net *net, u16 family, + struct dst_entry *dst_orig) +{ + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + struct dst_entry *ret; + + if (!afinfo) { + dst_release(dst_orig); + ret = ERR_PTR(-EINVAL); + } else { + ret = afinfo->blackhole_route(net, dst_orig); + } + xfrm_policy_put_afinfo(afinfo); + + return ret; +} + /* Main function: finds/creates a bundle for given flow. * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, - const struct flowi *fl, - struct sock *sk, int flags) +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + const struct flowi *fl, + struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct flow_cache_object *flo; @@ -1829,7 +1846,12 @@ restart: dst_release(dst); xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - return -EREMOTE; + + dst = make_blackhole(net, family, dst_orig); + if (IS_ERR(dst)) + return PTR_ERR(dst); + *dst_p = dst; + return 0; } if (fl->flags & FLOWI_FLAG_CAN_SLEEP) { DECLARE_WAITQUEUE(wait, current); @@ -1895,22 +1917,6 @@ dropdst: xfrm_pols_put(pols, drop_pols); return err; } -EXPORT_SYMBOL(__xfrm_lookup); - -int xfrm_lookup(struct net *net, struct dst_entry **dst_p, - const struct flowi *fl, - struct sock *sk, int flags) -{ - int err = __xfrm_lookup(net, dst_p, fl, sk, flags); - - if (err == -EREMOTE) { - dst_release(*dst_p); - *dst_p = NULL; - err = -EAGAIN; - } - - return err; -} EXPORT_SYMBOL(xfrm_lookup); static inline int -- cgit v1.1 From f6d460cf0ed16d35aec48f823685e7a0e0283d84 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 15:49:55 -0800 Subject: ipv4: Make icmp route lookup code a bit clearer. The route lookup code in icmp_send() is slightly tricky as a result of having to handle all of the requirements of RFC 4301 host relookups. Pull the route resolution into a seperate function, so that the error handling and route reference counting is hopefully easier to see and contained wholly within this new routine. Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 175 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 96 insertions(+), 79 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ad2bcf1..2a86c89 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -369,6 +369,98 @@ out_unlock: icmp_xmit_unlock(sk); } +static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, + struct iphdr *iph, + __be32 saddr, u8 tos, + int type, int code, + struct icmp_bxm *param) +{ + struct flowi fl = { + .fl4_dst = (param->replyopts.srr ? + param->replyopts.faddr : iph->saddr), + .fl4_src = saddr, + .fl4_tos = RT_TOS(tos), + .proto = IPPROTO_ICMP, + .fl_icmp_type = type, + .fl_icmp_code = code, + }; + struct rtable *rt, *rt2; + int err; + + security_skb_classify_flow(skb_in, &fl); + err = __ip_route_output_key(net, &rt, &fl); + if (err) + return ERR_PTR(err); + + /* No need to clone since we're just using its address. */ + rt2 = rt; + + if (!fl.fl4_src) + fl.fl4_src = rt->rt_src; + + err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); + switch (err) { + case 0: + if (rt != rt2) + return rt; + break; + case -EPERM: + rt = NULL; + break; + default: + return ERR_PTR(err); + } + + err = xfrm_decode_session_reverse(skb_in, &fl, AF_INET); + if (err) + goto relookup_failed; + + if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) { + err = __ip_route_output_key(net, &rt2, &fl); + } else { + struct flowi fl2 = {}; + unsigned long orefdst; + + fl2.fl4_dst = fl.fl4_src; + err = ip_route_output_key(net, &rt2, &fl2); + if (err) + goto relookup_failed; + /* Ugh! */ + orefdst = skb_in->_skb_refdst; /* save old refdst */ + err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, + RT_TOS(tos), rt2->dst.dev); + + dst_release(&rt2->dst); + rt2 = skb_rtable(skb_in); + skb_in->_skb_refdst = orefdst; /* restore old refdst */ + } + + if (err) + goto relookup_failed; + + err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, + XFRM_LOOKUP_ICMP); + switch (err) { + case 0: + dst_release(&rt->dst); + rt = rt2; + break; + case -EPERM: + return ERR_PTR(err); + default: + if (!rt) + return ERR_PTR(err); + break; + } + + + return rt; + +relookup_failed: + if (rt) + return rt; + return ERR_PTR(err); +} /* * Send an ICMP message in response to a situation @@ -506,86 +598,11 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ipc.opt = &icmp_param.replyopts; ipc.tx_flags = 0; - { - struct flowi fl = { - .fl4_dst = icmp_param.replyopts.srr ? - icmp_param.replyopts.faddr : iph->saddr, - .fl4_src = saddr, - .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_ICMP, - .fl_icmp_type = type, - .fl_icmp_code = code, - }; - int err; - struct rtable *rt2; - - security_skb_classify_flow(skb_in, &fl); - if (__ip_route_output_key(net, &rt, &fl)) - goto out_unlock; - - /* No need to clone since we're just using its address. */ - rt2 = rt; - - if (!fl.nl_u.ip4_u.saddr) - fl.nl_u.ip4_u.saddr = rt->rt_src; - - err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); - switch (err) { - case 0: - if (rt != rt2) - goto route_done; - break; - case -EPERM: - rt = NULL; - break; - default: - goto out_unlock; - } - - if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) - goto relookup_failed; - - if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) - err = __ip_route_output_key(net, &rt2, &fl); - else { - struct flowi fl2 = {}; - unsigned long orefdst; - - fl2.fl4_dst = fl.fl4_src; - if (ip_route_output_key(net, &rt2, &fl2)) - goto relookup_failed; - - /* Ugh! */ - orefdst = skb_in->_skb_refdst; /* save old refdst */ - err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, - RT_TOS(tos), rt2->dst.dev); - - dst_release(&rt2->dst); - rt2 = skb_rtable(skb_in); - skb_in->_skb_refdst = orefdst; /* restore old refdst */ - } - - if (err) - goto relookup_failed; - - err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, - XFRM_LOOKUP_ICMP); - switch (err) { - case 0: - dst_release(&rt->dst); - rt = rt2; - break; - case -EPERM: - goto ende; - default: -relookup_failed: - if (!rt) - goto out_unlock; - break; - } - } + rt = icmp_route_lookup(net, skb_in, iph, saddr, tos, + type, code, &icmp_param); + if (IS_ERR(rt)) + goto out_unlock; -route_done: if (!icmpv4_xrlim_allow(net, rt, type, code)) goto ende; -- cgit v1.1 From 8020c16a6c9fc8d6a5217be8d005f2fc558f6ab5 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 28 Feb 2011 14:09:50 +0100 Subject: Bluetooth: Fix possible NULL pointer dereference in cmd_complete It is now possible to create command complete event without specific reply data by passing NULL as reply with len 0. Check pointer before calling memcpy to avoid undefined behaviour. Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 46c3edc..34f58f4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -92,7 +92,9 @@ static int cmd_complete(struct sock *sk, u16 index, u16 cmd, void *rp, ev = (void *) skb_put(skb, sizeof(*ev) + rp_len); put_unaligned_le16(cmd, &ev->opcode); - memcpy(ev->data, rp, rp_len); + + if (rp) + memcpy(ev->data, rp, rp_len); if (sock_queue_rcv_skb(sk, skb) < 0) kfree_skb(skb); -- cgit v1.1 From 34eb525c1fda689507118a1f2c77fef51832ec8a Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Mon, 28 Feb 2011 14:10:08 +0100 Subject: Bluetooth: Log all parameters in cmd_status for easier debugging Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 34f58f4..e6efaae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -49,7 +49,7 @@ static int cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; - BT_DBG("sock %p", sk); + BT_DBG("sock %p, index %u, cmd %u, status %u", sk, index, cmd, status); skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); if (!skb) -- cgit v1.1 From 8ce6284ea350a5249d09c958bdd088ec0eb4f57f Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Tue, 1 Mar 2011 16:55:32 +0100 Subject: Bluetooth: Remove unused code from get_connections Command pointer was a leftover after moving controller index to mgmt_hdr. Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e6efaae..6dd0152 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -900,10 +900,8 @@ failed: return err; } -static int get_connections(struct sock *sk, u16 index, unsigned char *data, - u16 len) +static int get_connections(struct sock *sk, u16 index) { - struct mgmt_cp_get_connections *cp; struct mgmt_rp_get_connections *rp; struct hci_dev *hdev; struct list_head *p; @@ -913,8 +911,6 @@ static int get_connections(struct sock *sk, u16 index, unsigned char *data, BT_DBG(""); - cp = (void *) data; - hdev = hci_dev_get(index); if (!hdev) return cmd_status(sk, index, MGMT_OP_GET_CONNECTIONS, ENODEV); @@ -1333,7 +1329,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) err = disconnect(sk, index, buf + sizeof(*hdr), len); break; case MGMT_OP_GET_CONNECTIONS: - err = get_connections(sk, index, buf + sizeof(*hdr), len); + err = get_connections(sk, index); break; case MGMT_OP_PIN_CODE_REPLY: err = pin_code_reply(sk, index, buf + sizeof(*hdr), len); -- cgit v1.1 From 3cf2a4f6ca4e088ba79d05d6e7f4635c535e6ae4 Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Tue, 1 Mar 2011 16:55:33 +0100 Subject: Bluetooth: Use variable name instead of type in sizeof() As written in the CodingStyle doc. Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 6dd0152..f69dbcb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1035,7 +1035,7 @@ static int pin_code_neg_reply(struct sock *sk, u16 index, unsigned char *data, goto failed; } - err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(bdaddr_t), + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->bdaddr), &cp->bdaddr); if (err < 0) mgmt_pending_remove(cmd); -- cgit v1.1 From b8534e0f2b09e47790c261af0aee86fc88c6eb3c Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Tue, 1 Mar 2011 16:55:34 +0100 Subject: Bluetooth: Fix some small code style issues in mgmt.c Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/mgmt.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f69dbcb..0054c74 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -716,8 +716,7 @@ static int set_service_cache(struct sock *sk, u16 index, unsigned char *data, cp = (void *) data; if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, - EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_SERVICE_CACHE, EINVAL); hdev = hci_dev_get(index); if (!hdev) @@ -1058,8 +1057,7 @@ static int set_io_capability(struct sock *sk, u16 index, unsigned char *data, cp = (void *) data; if (len != sizeof(*cp)) - return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, - EINVAL); + return cmd_status(sk, index, MGMT_OP_SET_IO_CAPABILITY, EINVAL); hdev = hci_dev_get(index); if (!hdev) @@ -1070,7 +1068,7 @@ static int set_io_capability(struct sock *sk, u16 index, unsigned char *data, hdev->io_capability = cp->io_capability; BT_DBG("%s IO capability set to 0x%02x", hdev->name, - hdev->io_capability); + hdev->io_capability); hci_dev_unlock_bh(hdev); hci_dev_put(hdev); @@ -1424,8 +1422,7 @@ int mgmt_discoverable(u16 index, u8 discoverable) struct cmd_lookup match = { discoverable, NULL }; int ret; - mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, - mode_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_DISCOVERABLE, index, mode_rsp, &match); ev.val = discoverable; @@ -1631,8 +1628,7 @@ int mgmt_user_confirm_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) MGMT_OP_USER_CONFIRM_REPLY); } -int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, - u8 status) +int mgmt_user_confirm_neg_reply_complete(u16 index, bdaddr_t *bdaddr, u8 status) { return confirm_reply_complete(index, bdaddr, status, MGMT_OP_USER_CONFIRM_NEG_REPLY); -- cgit v1.1 From b42835dbe83d725198c7ab0bbe726d6dfd92a634 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 1 Mar 2011 22:06:22 -0800 Subject: ipv6: Make icmp route lookup code a bit clearer. The route lookup code in icmpv6_send() is slightly tricky as a result of having to handle all of the requirements of RFC 4301 host relookups. Pull the route resolution into a seperate function, so that the error handling and route reference counting is hopefully easier to see and contained wholly within this new routine. Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 117 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 66 insertions(+), 51 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a31d91b..e332bae 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -300,6 +300,70 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif +static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi *fl) +{ + struct dst_entry *dst, *dst2; + struct flowi fl2; + int err; + + err = ip6_dst_lookup(sk, &dst, fl); + if (err) + return ERR_PTR(err); + + /* + * We won't send icmp if the destination is known + * anycast. + */ + if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { + LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n"); + dst_release(dst); + return ERR_PTR(-EINVAL); + } + + /* No need to clone since we're just using its address. */ + dst2 = dst; + + err = xfrm_lookup(net, &dst, fl, sk, 0); + switch (err) { + case 0: + if (dst != dst2) + return dst; + break; + case -EPERM: + dst = NULL; + break; + default: + return ERR_PTR(err); + } + + err = xfrm_decode_session_reverse(skb, &fl2, AF_INET6); + if (err) + goto relookup_failed; + + err = ip6_dst_lookup(sk, &dst2, &fl2); + if (err) + goto relookup_failed; + + err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP); + switch (err) { + case 0: + dst_release(dst); + dst = dst2; + break; + case -EPERM: + dst_release(dst); + return ERR_PTR(err); + default: + goto relookup_failed; + } + +relookup_failed: + if (dst) + return dst; + return ERR_PTR(err); +} + /* * Send an ICMP message in response to a packet in error */ @@ -312,10 +376,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) struct ipv6_pinfo *np; struct in6_addr *saddr = NULL; struct dst_entry *dst; - struct dst_entry *dst2; struct icmp6hdr tmp_hdr; struct flowi fl; - struct flowi fl2; struct icmpv6_msg msg; int iif = 0; int addr_type = 0; @@ -408,57 +470,10 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto out; - - /* - * We won't send icmp if the destination is known - * anycast. - */ - if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n"); - goto out_dst_release; - } - - /* No need to clone since we're just using its address. */ - dst2 = dst; - - err = xfrm_lookup(net, &dst, &fl, sk, 0); - switch (err) { - case 0: - if (dst != dst2) - goto route_done; - break; - case -EPERM: - dst = NULL; - break; - default: + dst = icmpv6_route_lookup(net, skb, sk, &fl); + if (IS_ERR(dst)) goto out; - } - - if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6)) - goto relookup_failed; - - if (ip6_dst_lookup(sk, &dst2, &fl2)) - goto relookup_failed; - - err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP); - switch (err) { - case 0: - dst_release(dst); - dst = dst2; - break; - case -EPERM: - goto out_dst_release; - default: -relookup_failed: - if (!dst) - goto out; - break; - } -route_done: if (ipv6_addr_is_multicast(&fl.fl6_dst)) hlimit = np->mcast_hops; else -- cgit v1.1 From 7f6daa635c28ed686835a4080269e3fdc5a01012 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 1 Mar 2011 22:51:52 -0800 Subject: pfkey: fix warning If CONFIG_NET_KEY_MIGRATE is not defined the arguments of pfkey_migrate stub do not match causing warning. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/key/af_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 7fb5457..7db86ff 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2560,7 +2560,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } #else static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, - struct sadb_msg *hdr, void **ext_hdrs) + const struct sadb_msg *hdr, void * const *ext_hdrs) { return -ENOPROTOOPT; } -- cgit v1.1 From 07df5294a753dfac2cc9f75e6159fc25fdc22149 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 1 Mar 2011 23:00:58 -0800 Subject: inet: Replace left-over references to inet->cork The patch to replace inet->cork with cork left out two spots in __ip_append_data that can result in bogus packet construction. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 68dbe2d..33316b3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -789,7 +789,7 @@ static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - struct ip_options *opt = inet->cork.opt; + struct ip_options *opt = cork->opt; int hh_len; int exthdrlen; int mtu; @@ -803,7 +803,7 @@ static int __ip_append_data(struct sock *sk, struct sk_buff_head *queue, exthdrlen = transhdrlen ? rt->dst.header_len : 0; length += exthdrlen; transhdrlen += exthdrlen; - mtu = inet->cork.fragsize; + mtu = cork->fragsize; hh_len = LL_RESERVED_SPACE(rt->dst.dev); -- cgit v1.1 From 452edd598f60522c11f7f88fdbab27eb36509d1a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Mar 2011 13:27:41 -0800 Subject: xfrm: Return dst directly from xfrm_lookup() Instead of on the stack. Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 12 ++++++++++-- net/ipv4/icmp.c | 36 ++++++++++++++---------------------- net/ipv4/netfilter.c | 6 ++++-- net/ipv4/route.c | 7 ++++++- net/ipv6/icmp.c | 37 ++++++++++++++++++------------------- net/ipv6/ip6_output.c | 10 ++-------- net/ipv6/ip6_tunnel.c | 8 +++++++- net/ipv6/mcast.c | 13 ++++++++++--- net/ipv6/ndisc.c | 8 ++++---- net/ipv6/netfilter.c | 3 ++- net/ipv6/netfilter/ip6t_REJECT.c | 3 ++- net/netfilter/ipvs/ip_vs_xmit.c | 9 +++++++-- net/xfrm/xfrm_policy.c | 34 +++++++++++++++++----------------- 13 files changed, 103 insertions(+), 83 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 0877147..484fdbf 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1222,7 +1222,11 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f err = __dn_route_output_key(pprt, flp, flags); if (err == 0 && flp->proto) { - err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); + *pprt = xfrm_lookup(&init_net, *pprt, flp, NULL, 0); + if (IS_ERR(*pprt)) { + err = PTR_ERR(*pprt); + *pprt = NULL; + } } return err; } @@ -1235,7 +1239,11 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock if (err == 0 && fl->proto) { if (!(flags & MSG_DONTWAIT)) fl->flags |= FLOWI_FLAG_CAN_SLEEP; - err = xfrm_lookup(&init_net, pprt, fl, sk, 0); + *pprt = xfrm_lookup(&init_net, *pprt, fl, sk, 0); + if (IS_ERR(*pprt)) { + err = PTR_ERR(*pprt); + *pprt = NULL; + } } return err; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 2a86c89..c23bd8c 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -398,18 +398,14 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, if (!fl.fl4_src) fl.fl4_src = rt->rt_src; - err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); - switch (err) { - case 0: + rt = (struct rtable *) xfrm_lookup(net, &rt->dst, &fl, NULL, 0); + if (!IS_ERR(rt)) { if (rt != rt2) return rt; - break; - case -EPERM: + } else if (PTR_ERR(rt) == -EPERM) { rt = NULL; - break; - default: - return ERR_PTR(err); - } + } else + return rt; err = xfrm_decode_session_reverse(skb_in, &fl, AF_INET); if (err) @@ -438,22 +434,18 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, if (err) goto relookup_failed; - err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, - XFRM_LOOKUP_ICMP); - switch (err) { - case 0: + rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, &fl, NULL, XFRM_LOOKUP_ICMP); + if (!IS_ERR(rt2)) { dst_release(&rt->dst); rt = rt2; - break; - case -EPERM: - return ERR_PTR(err); - default: - if (!rt) - return ERR_PTR(err); - break; + } else if (PTR_ERR(rt2) == -EPERM) { + if (rt) + dst_release(&rt->dst); + return rt2; + } else { + err = PTR_ERR(rt2); + goto relookup_failed; } - - return rt; relookup_failed: diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 994a1f2..9770bb4 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -69,7 +69,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) xfrm_decode_session(skb, &fl, AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); - if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) + dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); + if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); } @@ -102,7 +103,8 @@ int ip_xfrm_me_harder(struct sk_buff *skb) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) + dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); + if (IS_ERR(dst)) return -1; skb_dst_drop(skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e24e4cf..63d3700 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2730,7 +2730,12 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - return xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, 0); + *rp = (struct rtable *) xfrm_lookup(net, &(*rp)->dst, flp, sk, 0); + if (IS_ERR(*rp)) { + err = PTR_ERR(*rp); + *rp = NULL; + return err; + } } return 0; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e332bae..5566595 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -324,17 +324,15 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk /* No need to clone since we're just using its address. */ dst2 = dst; - err = xfrm_lookup(net, &dst, fl, sk, 0); - switch (err) { - case 0: + dst = xfrm_lookup(net, dst, fl, sk, 0); + if (!IS_ERR(dst)) { if (dst != dst2) return dst; - break; - case -EPERM: - dst = NULL; - break; - default: - return ERR_PTR(err); + } else { + if (PTR_ERR(dst) == -EPERM) + dst = NULL; + else + return dst; } err = xfrm_decode_session_reverse(skb, &fl2, AF_INET6); @@ -345,17 +343,17 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk if (err) goto relookup_failed; - err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP); - switch (err) { - case 0: + dst2 = xfrm_lookup(net, dst2, &fl2, sk, XFRM_LOOKUP_ICMP); + if (!IS_ERR(dst2)) { dst_release(dst); dst = dst2; - break; - case -EPERM: - dst_release(dst); - return ERR_PTR(err); - default: - goto relookup_failed; + } else { + err = PTR_ERR(dst2); + if (err == -EPERM) { + dst_release(dst); + return dst2; + } else + goto relookup_failed; } relookup_failed: @@ -560,7 +558,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; - if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) + dst = xfrm_lookup(net, dst, &fl, sk, 0); + if (IS_ERR(dst)) goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 35a4ad9..adaffaf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1028,10 +1028,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, if (can_sleep) fl->flags |= FLOWI_FLAG_CAN_SLEEP; - err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); - if (err) - return ERR_PTR(err); - return dst; + return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); @@ -1067,10 +1064,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, if (can_sleep) fl->flags |= FLOWI_FLAG_CAN_SLEEP; - err = xfrm_lookup(sock_net(sk), &dst, fl, sk, 0); - if (err) - return ERR_PTR(err); - return dst; + return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4f4483e..da43038 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -903,8 +903,14 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, else { dst = ip6_route_output(net, NULL, fl); - if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) + if (dst->error) goto tx_err_link_failure; + dst = xfrm_lookup(net, dst, fl, NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + goto tx_err_link_failure; + } } tdev = dst->dev; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 49f986d..7b27d08 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1429,7 +1429,12 @@ static void mld_sendpack(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(net, &dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, &fl, NULL, 0); + err = 0; + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + } skb_dst_set(skb, dst); if (err) goto err_out; @@ -1796,9 +1801,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(net, &dst, &fl, NULL, 0); - if (err) + dst = xfrm_lookup(net, dst, &fl, NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto err_out; + } skb_dst_set(skb, dst); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7254ce3..9360d3b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -529,8 +529,8 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - err = xfrm_lookup(net, &dst, &fl, NULL, 0); - if (err < 0) { + dst = xfrm_lookup(net, dst, &fl, NULL, 0); + if (IS_ERR(dst)) { kfree_skb(skb); return; } @@ -1542,8 +1542,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (dst == NULL) return; - err = xfrm_lookup(net, &dst, &fl, NULL, 0); - if (err) + dst = xfrm_lookup(net, dst, &fl, NULL, 0); + if (IS_ERR(dst)) return; rt = (struct rt6_info *) dst; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 35915e8..8d74116 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -39,7 +39,8 @@ int ip6_route_me_harder(struct sk_buff *skb) if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET6) == 0) { skb_dst_set(skb, NULL); - if (xfrm_lookup(net, &dst, &fl, skb->sk, 0)) + dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); + if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index bf998fe..91f6a61 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -101,7 +101,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) dst_release(dst); return; } - if (xfrm_lookup(net, &dst, &fl, NULL, 0)) + dst = xfrm_lookup(net, dst, &fl, NULL, 0); + if (IS_ERR(dst)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index a48239a..6264219 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -218,8 +218,13 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, &fl.fl6_dst, 0, &fl.fl6_src) < 0) goto out_err; - if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0) - goto out_err; + if (do_xfrm) { + dst = xfrm_lookup(net, dst, &fl, NULL, 0); + if (IS_ERR(dst)) { + dst = NULL; + goto out_err; + } + } ipv6_addr_copy(ret_saddr, &fl.fl6_src); return dst; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0248afa..b1932a6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1757,14 +1757,14 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int xfrm_lookup(struct net *net, struct dst_entry **dst_p, - const struct flowi *fl, - struct sock *sk, int flags) +struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct flow_cache_object *flo; struct xfrm_dst *xdst; - struct dst_entry *dst, *dst_orig = *dst_p, *route; + struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); int i, err, num_pols, num_xfrms = 0, drop_pols = 0; @@ -1847,11 +1847,7 @@ restart: xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - dst = make_blackhole(net, family, dst_orig); - if (IS_ERR(dst)) - return PTR_ERR(dst); - *dst_p = dst; - return 0; + return make_blackhole(net, family, dst_orig); } if (fl->flags & FLOWI_FLAG_CAN_SLEEP) { DECLARE_WAITQUEUE(wait, current); @@ -1895,27 +1891,28 @@ no_transform: goto error; } else if (num_xfrms > 0) { /* Flow transformed */ - *dst_p = dst; dst_release(dst_orig); } else { /* Flow passes untransformed */ dst_release(dst); + dst = dst_orig; } ok: xfrm_pols_put(pols, drop_pols); - return 0; + return dst; nopol: - if (!(flags & XFRM_LOOKUP_ICMP)) + if (!(flags & XFRM_LOOKUP_ICMP)) { + dst = dst_orig; goto ok; + } err = -ENOENT; error: dst_release(dst); dropdst: dst_release(dst_orig); - *dst_p = NULL; xfrm_pols_put(pols, drop_pols); - return err; + return ERR_PTR(err); } EXPORT_SYMBOL(xfrm_lookup); @@ -2175,7 +2172,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) struct net *net = dev_net(skb->dev); struct flowi fl; struct dst_entry *dst; - int res; + int res = 0; if (xfrm_decode_session(skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); @@ -2183,9 +2180,12 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); - dst = skb_dst(skb); - res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + if (IS_ERR(dst)) { + res = 1; + dst = NULL; + } skb_dst_set(skb, dst); return res; } -- cgit v1.1 From b23dd4fe42b455af5c6e20966b7d6959fa8352ea Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Mar 2011 14:31:35 -0800 Subject: ipv4: Make output route lookup return rtable directly. Instead of on the stack. Signed-off-by: David S. Miller --- net/atm/clip.c | 6 +-- net/bridge/br_netfilter.c | 9 ++-- net/dccp/ipv4.c | 27 +++++------ net/ipv4/af_inet.c | 30 ++++++------ net/ipv4/arp.c | 19 ++++---- net/ipv4/datagram.c | 11 +++-- net/ipv4/icmp.c | 19 +++++--- net/ipv4/igmp.c | 16 ++++--- net/ipv4/inet_connection_sock.c | 3 +- net/ipv4/ip_gre.c | 11 +++-- net/ipv4/ip_output.c | 6 ++- net/ipv4/ipip.c | 7 +-- net/ipv4/ipmr.c | 8 ++-- net/ipv4/netfilter.c | 12 +++-- net/ipv4/raw.c | 8 ++-- net/ipv4/route.c | 100 ++++++++++++++++++++-------------------- net/ipv4/syncookies.c | 3 +- net/ipv4/tcp_ipv4.c | 28 +++++------ net/ipv4/udp.c | 5 +- net/ipv4/xfrm4_policy.c | 12 ++--- net/ipv6/ip6_tunnel.c | 11 +++-- net/ipv6/sit.c | 8 ++-- net/l2tp/l2tp_ip.c | 8 ++-- net/netfilter/ipvs/ip_vs_xmit.c | 9 ++-- net/netfilter/xt_TEE.c | 3 +- net/rxrpc/ar-peer.c | 7 ++- net/sctp/protocol.c | 7 +-- 27 files changed, 214 insertions(+), 179 deletions(-) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index d257da5..810a129 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -520,9 +520,9 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) unlink_clip_vcc(clip_vcc); return 0; } - error = ip_route_output_key(&init_net, &rt, &fl); - if (error) - return error; + rt = ip_route_output_key(&init_net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); ip_rt_put(rt); if (!neigh) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 4b5b66d..45b57b1 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -428,14 +428,15 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; - if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { + rt = ip_route_output_key(dev_net(dev), &fl); + if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ - if (((struct dst_entry *)rt)->dev == dev) { - skb_dst_set(skb, (struct dst_entry *)rt); + if (rt->dst.dev == dev) { + skb_dst_set(skb, &rt->dst); goto bridged_dnat; } - dst_release((struct dst_entry *)rt); + ip_rt_put(rt); } free_skb: kfree_skb(skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a8ff955..7882377 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -46,7 +46,6 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) __be16 orig_sport, orig_dport; struct rtable *rt; __be32 daddr, nexthop; - int tmp; int err; dp->dccps_role = DCCP_ROLE_CLIENT; @@ -66,12 +65,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) orig_sport = inet->inet_sport; orig_dport = usin->sin_port; - tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_DCCP, - orig_sport, orig_dport, sk, true); - if (tmp < 0) - return tmp; + rt = ip_route_connect(nexthop, inet->inet_saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_DCCP, + orig_sport, orig_dport, sk, true); + if (IS_ERR(rt)) + return PTR_ERR(rt); if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); @@ -102,12 +101,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - err = ip_route_newports(&rt, IPPROTO_DCCP, - orig_sport, orig_dport, - inet->inet_sport, inet->inet_dport, sk); - if (err != 0) + rt = ip_route_newports(rt, IPPROTO_DCCP, + orig_sport, orig_dport, + inet->inet_sport, inet->inet_dport, sk); + if (IS_ERR(rt)) { + rt = NULL; goto failure; - + } /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->dst); @@ -475,7 +475,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk)) { + rt = ip_route_output_flow(net, &fl, sk); + if (IS_ERR(rt)) { IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 44513bb..35a5020 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1101,23 +1101,20 @@ int sysctl_ip_dynaddr __read_mostly; static int inet_sk_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); - int err; - struct rtable *rt; __be32 old_saddr = inet->inet_saddr; - __be32 new_saddr; __be32 daddr = inet->inet_daddr; + struct rtable *rt; + __be32 new_saddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; /* Query new route. */ - err = ip_route_connect(&rt, daddr, 0, - RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if, - sk->sk_protocol, - inet->inet_sport, inet->inet_dport, sk, false); - if (err) - return err; + rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if, sk->sk_protocol, + inet->inet_sport, inet->inet_dport, sk, false); + if (IS_ERR(rt)) + return PTR_ERR(rt); sk_setup_caps(sk, &rt->dst); @@ -1160,7 +1157,7 @@ int inet_sk_rebuild_header(struct sock *sk) daddr = inet->inet_daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; -{ + { struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, @@ -1174,11 +1171,14 @@ int inet_sk_rebuild_header(struct sock *sk) }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk); -} - if (!err) + rt = ip_route_output_flow(sock_net(sk), &fl, sk); + } + if (!IS_ERR(rt)) { + err = 0; sk_setup_caps(sk, &rt->dst); - else { + } else { + err = PTR_ERR(rt); + /* Routing failed... */ sk->sk_route_caps = 0; /* diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 7927589..fa9988d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -440,7 +440,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) /*unsigned long now; */ struct net *net = dev_net(dev); - if (ip_route_output_key(net, &rt, &fl) < 0) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); @@ -1063,10 +1064,10 @@ static int arp_req_set(struct net *net, struct arpreq *r, if (dev == NULL) { struct flowi fl = { .fl4_dst = ip, .fl4_tos = RTO_ONLINK }; - struct rtable *rt; - err = ip_route_output_key(net, &rt, &fl); - if (err != 0) - return err; + struct rtable *rt = ip_route_output_key(net, &fl); + + if (IS_ERR(rt)) + return PTR_ERR(rt); dev = rt->dst.dev; ip_rt_put(rt); if (!dev) @@ -1177,7 +1178,6 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r, static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device *dev) { - int err; __be32 ip; if (r->arp_flags & ATF_PUBL) @@ -1187,10 +1187,9 @@ static int arp_req_delete(struct net *net, struct arpreq *r, if (dev == NULL) { struct flowi fl = { .fl4_dst = ip, .fl4_tos = RTO_ONLINK }; - struct rtable *rt; - err = ip_route_output_key(net, &rt, &fl); - if (err != 0) - return err; + struct rtable *rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); dev = rt->dst.dev; ip_rt_put(rt); if (!dev) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index eaee1ed..85bd24c 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -46,11 +46,12 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!saddr) saddr = inet->mc_addr; } - err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr, - RT_CONN_FLAGS(sk), oif, - sk->sk_protocol, - inet->inet_sport, usin->sin_port, sk, true); - if (err) { + rt = ip_route_connect(usin->sin_addr.s_addr, saddr, + RT_CONN_FLAGS(sk), oif, + sk->sk_protocol, + inet->inet_sport, usin->sin_port, sk, true); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); if (err == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c23bd8c..994a785 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -358,7 +358,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .fl4_tos = RT_TOS(ip_hdr(skb)->tos), .proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_key(net, &rt, &fl)) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) goto out_unlock; } if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, @@ -388,9 +389,9 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, int err; security_skb_classify_flow(skb_in, &fl); - err = __ip_route_output_key(net, &rt, &fl); - if (err) - return ERR_PTR(err); + rt = __ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return rt; /* No need to clone since we're just using its address. */ rt2 = rt; @@ -412,15 +413,19 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, goto relookup_failed; if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) { - err = __ip_route_output_key(net, &rt2, &fl); + rt2 = __ip_route_output_key(net, &fl); + if (IS_ERR(rt2)) + err = PTR_ERR(rt2); } else { struct flowi fl2 = {}; unsigned long orefdst; fl2.fl4_dst = fl.fl4_src; - err = ip_route_output_key(net, &rt2, &fl2); - if (err) + rt2 = ip_route_output_key(net, &fl2); + if (IS_ERR(rt2)) { + err = PTR_ERR(rt2); goto relookup_failed; + } /* Ugh! */ orefdst = skb_in->_skb_refdst; /* save old refdst */ err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e0e77e2..44ba906 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -325,7 +325,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct flowi fl = { .oif = dev->ifindex, .fl4_dst = IGMPV3_ALL_MCR, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(net, &rt, &fl)) { + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { kfree_skb(skb); return NULL; } @@ -670,7 +671,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, struct flowi fl = { .oif = dev->ifindex, .fl4_dst = dst, .proto = IPPROTO_IGMP }; - if (ip_route_output_key(net, &rt, &fl)) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) return -1; } if (rt->rt_src == 0) { @@ -1440,7 +1442,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; - struct rtable *rt; struct net_device *dev = NULL; struct in_device *idev = NULL; @@ -1454,9 +1455,12 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) return NULL; } - if (!dev && !ip_route_output_key(net, &rt, &fl)) { - dev = rt->dst.dev; - ip_rt_put(rt); + if (!dev) { + struct rtable *rt = ip_route_output_key(net, &fl); + if (!IS_ERR(rt)) { + dev = rt->dst.dev; + ip_rt_put(rt); + } } if (dev) { imr->imr_ifindex = dev->ifindex; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7f85d4a..e4e301a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -369,7 +369,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk)) + rt = ip_route_output_flow(net, &fl, sk); + if (IS_ERR(rt)) goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) goto route_err; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6613edf..f9af98d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -778,7 +778,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev .proto = IPPROTO_GRE, .fl_gre_key = tunnel->parms.o_key }; - if (ip_route_output_key(dev_net(dev), &rt, &fl)) { + rt = ip_route_output_key(dev_net(dev), &fl); + if (IS_ERR(rt)) { dev->stats.tx_carrier_errors++; goto tx_error; } @@ -953,9 +954,9 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) .proto = IPPROTO_GRE, .fl_gre_key = tunnel->parms.o_key }; - struct rtable *rt; + struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); - if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { + if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); } @@ -1215,9 +1216,9 @@ static int ipgre_open(struct net_device *dev) .proto = IPPROTO_GRE, .fl_gre_key = t->parms.o_key }; - struct rtable *rt; + struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); - if (ip_route_output_key(dev_net(dev), &rt, &fl)) + if (IS_ERR(rt)) return -EADDRNOTAVAIL; dev = rt->dst.dev; ip_rt_put(rt); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 33316b3..171f483 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -355,7 +355,8 @@ int ip_queue_xmit(struct sk_buff *skb) * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk)) + rt = ip_route_output_flow(sock_net(sk), &fl, sk); + if (IS_ERR(rt)) goto no_route; } sk_setup_caps(sk, &rt->dst); @@ -1489,7 +1490,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .proto = sk->sk_protocol, .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); - if (ip_route_output_key(sock_net(sk), &rt, &fl)) + rt = ip_route_output_key(sock_net(sk), &fl); + if (IS_ERR(rt)) return; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 988f52f..e1e1757 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -469,7 +469,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .proto = IPPROTO_IPIP }; - if (ip_route_output_key(dev_net(dev), &rt, &fl)) { + rt = ip_route_output_key(dev_net(dev), &fl); + if (IS_ERR(rt)) { dev->stats.tx_carrier_errors++; goto tx_error_icmp; } @@ -590,9 +591,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; - struct rtable *rt; + struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); - if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { + if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8b65a12..26ca2f2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1618,8 +1618,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; - - if (ip_route_output_key(net, &rt, &fl)) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) goto out_free; encap = sizeof(struct iphdr); } else { @@ -1629,8 +1629,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; - - if (ip_route_output_key(net, &rt, &fl)) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) goto out_free; } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 9770bb4..67bf709 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -38,7 +38,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; - if (ip_route_output_key(net, &rt, &fl) != 0) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) return -1; /* Drop old route. */ @@ -48,7 +49,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ fl.fl4_dst = iph->saddr; - if (ip_route_output_key(net, &rt, &fl) != 0) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) return -1; orefdst = skb->_skb_refdst; @@ -221,7 +223,11 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) { - return ip_route_output_key(&init_net, (struct rtable **)dst, fl); + struct rtable *rt = ip_route_output_key(&init_net, fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); + *dst = &rt->dst; + return 0; } static const struct nf_afinfo nf_ip_afinfo = { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index d7a2d1e..467d570 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -564,10 +564,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk); + rt = ip_route_output_flow(sock_net(sk), &fl, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto done; + } } - if (err) - goto done; err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 63d3700..5090e95 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1014,8 +1014,8 @@ static int slow_chain_length(const struct rtable *head) return length >> FRACT_BITS; } -static int rt_intern_hash(unsigned hash, struct rtable *rt, - struct rtable **rp, struct sk_buff *skb, int ifindex) +static struct rtable *rt_intern_hash(unsigned hash, struct rtable *rt, + struct sk_buff *skb, int ifindex) { struct rtable *rth, *cand; struct rtable __rcu **rthp, **candp; @@ -1056,7 +1056,7 @@ restart: printk(KERN_WARNING "Neighbour table failure & not caching routes.\n"); ip_rt_put(rt); - return err; + return ERR_PTR(err); } } @@ -1093,11 +1093,9 @@ restart: spin_unlock_bh(rt_hash_lock_addr(hash)); rt_drop(rt); - if (rp) - *rp = rth; - else + if (skb) skb_dst_set(skb, &rth->dst); - return 0; + return rth; } if (!atomic_read(&rth->dst.__refcnt)) { @@ -1154,7 +1152,7 @@ restart: if (err != -ENOBUFS) { rt_drop(rt); - return err; + return ERR_PTR(err); } /* Neighbour tables are full and nothing @@ -1175,7 +1173,7 @@ restart: if (net_ratelimit()) printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); rt_drop(rt); - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); } } @@ -1201,11 +1199,9 @@ restart: spin_unlock_bh(rt_hash_lock_addr(hash)); skip_hashing: - if (rp) - *rp = rt; - else + if (skb) skb_dst_set(skb, &rt->dst); - return 0; + return rt; } static atomic_t __rt_peer_genid = ATOMIC_INIT(0); @@ -1896,7 +1892,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, RT_CACHE_STAT_INC(in_slow_mc); hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); - return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); + rth = rt_intern_hash(hash, rth, skb, dev->ifindex); + err = 0; + if (IS_ERR(rth)) + err = PTR_ERR(rth); e_nobufs: return -ENOBUFS; @@ -2051,7 +2050,10 @@ static int ip_mkroute_input(struct sk_buff *skb, /* put it into the cache */ hash = rt_hash(daddr, saddr, fl->iif, rt_genid(dev_net(rth->dst.dev))); - return rt_intern_hash(hash, rth, NULL, skb, fl->iif); + rth = rt_intern_hash(hash, rth, skb, fl->iif); + if (IS_ERR(rth)) + return PTR_ERR(rth); + return 0; } /* @@ -2194,7 +2196,10 @@ local_input: } rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); - err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); + rth = rt_intern_hash(hash, rth, skb, fl.iif); + err = 0; + if (IS_ERR(rth)) + err = PTR_ERR(rth); goto out; no_route: @@ -2422,8 +2427,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, * called with rcu_read_lock(); */ -static int ip_route_output_slow(struct net *net, struct rtable **rp, - const struct flowi *oldflp) +static struct rtable *ip_route_output_slow(struct net *net, + const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); struct flowi fl = { .fl4_dst = oldflp->fl4_dst, @@ -2438,8 +2443,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, unsigned int flags = 0; struct net_device *dev_out = NULL; struct rtable *rth; - int err; - res.fi = NULL; #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -2448,7 +2451,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, rcu_read_lock(); if (oldflp->fl4_src) { - err = -EINVAL; + rth = ERR_PTR(-EINVAL); if (ipv4_is_multicast(oldflp->fl4_src) || ipv4_is_lbcast(oldflp->fl4_src) || ipv4_is_zeronet(oldflp->fl4_src)) @@ -2499,13 +2502,13 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, if (oldflp->oif) { dev_out = dev_get_by_index_rcu(net, oldflp->oif); - err = -ENODEV; + rth = ERR_PTR(-ENODEV); if (dev_out == NULL) goto out; /* RACE: Check return value of inet_select_addr instead. */ if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { - err = -ENETUNREACH; + rth = ERR_PTR(-ENETUNREACH); goto out; } if (ipv4_is_local_multicast(oldflp->fl4_dst) || @@ -2563,7 +2566,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, res.type = RTN_UNICAST; goto make_route; } - err = -ENETUNREACH; + rth = ERR_PTR(-ENETUNREACH); goto out; } @@ -2598,23 +2601,20 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, make_route: rth = __mkroute_output(&res, &fl, oldflp, dev_out, flags); - if (IS_ERR(rth)) - err = PTR_ERR(rth); - else { + if (!IS_ERR(rth)) { unsigned int hash; hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, rt_genid(dev_net(dev_out))); - err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); + rth = rt_intern_hash(hash, rth, NULL, oldflp->oif); } out: rcu_read_unlock(); - return err; + return rth; } -int __ip_route_output_key(struct net *net, struct rtable **rp, - const struct flowi *flp) +struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) { struct rtable *rth; unsigned int hash; @@ -2639,15 +2639,14 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, dst_use(&rth->dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); - *rp = rth; - return 0; + return rth; } RT_CACHE_STAT_INC(out_hlist_search); } rcu_read_unlock_bh(); slow_output: - return ip_route_output_slow(net, rp, flp); + return ip_route_output_slow(net, flp); } EXPORT_SYMBOL_GPL(__ip_route_output_key); @@ -2717,34 +2716,29 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or return rt ? &rt->dst : ERR_PTR(-ENOMEM); } -int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, - struct sock *sk) +struct rtable *ip_route_output_flow(struct net *net, struct flowi *flp, + struct sock *sk) { - int err; + struct rtable *rt = __ip_route_output_key(net, flp); - if ((err = __ip_route_output_key(net, rp, flp)) != 0) - return err; + if (IS_ERR(rt)) + return rt; if (flp->proto) { if (!flp->fl4_src) - flp->fl4_src = (*rp)->rt_src; + flp->fl4_src = rt->rt_src; if (!flp->fl4_dst) - flp->fl4_dst = (*rp)->rt_dst; - *rp = (struct rtable *) xfrm_lookup(net, &(*rp)->dst, flp, sk, 0); - if (IS_ERR(*rp)) { - err = PTR_ERR(*rp); - *rp = NULL; - return err; - } + flp->fl4_dst = rt->rt_dst; + rt = (struct rtable *) xfrm_lookup(net, &rt->dst, flp, sk, 0); } - return 0; + return rt; } EXPORT_SYMBOL_GPL(ip_route_output_flow); -int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) +struct rtable *ip_route_output_key(struct net *net, struct flowi *flp) { - return ip_route_output_flow(net, rp, flp, NULL); + return ip_route_output_flow(net, flp, NULL); } EXPORT_SYMBOL(ip_route_output_key); @@ -2915,7 +2909,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .mark = mark, }; - err = ip_route_output_key(net, &rt, &fl); + rt = ip_route_output_key(net, &fl); + + err = 0; + if (IS_ERR(rt)) + err = PTR_ERR(rt); } if (err) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 4751920..0ad6ddf 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -355,7 +355,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .fl_ip_sport = th->dest, .fl_ip_dport = th->source }; security_req_classify_flow(req, &fl); - if (ip_route_output_key(sock_net(sk), &rt, &fl)) { + rt = ip_route_output_key(sock_net(sk), &fl); + if (IS_ERR(rt)) { reqsk_free(req); goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 05bc6d9..f7e6c2c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -152,7 +152,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) __be16 orig_sport, orig_dport; struct rtable *rt; __be32 daddr, nexthop; - int tmp; int err; if (addr_len < sizeof(struct sockaddr_in)) @@ -170,14 +169,15 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) orig_sport = inet->inet_sport; orig_dport = usin->sin_port; - tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_TCP, - orig_sport, orig_dport, sk, true); - if (tmp < 0) { - if (tmp == -ENETUNREACH) + rt = ip_route_connect(nexthop, inet->inet_saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_TCP, + orig_sport, orig_dport, sk, true); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + if (err == -ENETUNREACH) IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); - return tmp; + return err; } if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { @@ -236,12 +236,14 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err) goto failure; - err = ip_route_newports(&rt, IPPROTO_TCP, - orig_sport, orig_dport, - inet->inet_sport, inet->inet_dport, sk); - if (err) + rt = ip_route_newports(rt, IPPROTO_TCP, + orig_sport, orig_dport, + inet->inet_sport, inet->inet_dport, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; goto failure; - + } /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ed9a5b7..95e0c2c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -922,8 +922,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct net *net = sock_net(sk); security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(net, &rt, &fl, sk); - if (err) { + rt = ip_route_output_flow(net, &fl, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); if (err == -ENETUNREACH) IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); goto out; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 5f0f058..45b8214 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -26,18 +26,16 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, .fl4_dst = daddr->a4, .fl4_tos = tos, }; - struct dst_entry *dst; struct rtable *rt; - int err; if (saddr) fl.fl4_src = saddr->a4; - err = __ip_route_output_key(net, &rt, &fl); - dst = &rt->dst; - if (err) - dst = ERR_PTR(err); - return dst; + rt = __ip_route_output_key(net, &fl); + if (!IS_ERR(rt)) + return &rt->dst; + + return ERR_CAST(rt); } static int xfrm4_get_saddr(struct net *net, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index da43038..02730ef 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -581,7 +581,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_dst = eiph->saddr; fl.fl4_tos = RT_TOS(eiph->tos); fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) + rt = ip_route_output_key(dev_net(skb->dev), &fl); + if (IS_ERR(rt)) goto out; skb2->dev = rt->dst.dev; @@ -593,12 +594,14 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.fl4_dst = eiph->daddr; fl.fl4_src = eiph->saddr; fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || + rt = ip_route_output_key(dev_net(skb->dev), &fl); + if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { - ip_rt_put(rt); + if (!IS_ERR(rt)) + ip_rt_put(rt); goto out; } - skb_dst_set(skb2, (struct dst_entry *)rt); + skb_dst_set(skb2, &rt->dst); } else { ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index b1599a3..b8c8adb 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -738,7 +738,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, .fl4_tos = RT_TOS(tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; - if (ip_route_output_key(dev_net(dev), &rt, &fl)) { + rt = ip_route_output_key(dev_net(dev), &fl); + if (IS_ERR(rt)) { dev->stats.tx_carrier_errors++; goto tx_error_icmp; } @@ -862,8 +863,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) .fl4_tos = RT_TOS(iph->tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; - struct rtable *rt; - if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { + struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + + if (!IS_ERR(rt)) { tdev = rt->dst.dev; ip_rt_put(rt); } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 5381ceb..2a698ff 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -320,11 +320,12 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len if (ipv4_is_multicast(lsa->l2tp_addr.s_addr)) goto out; - rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr, + rt = ip_route_connect(lsa->l2tp_addr.s_addr, saddr, RT_CONN_FLAGS(sk), oif, IPPROTO_L2TP, 0, 0, sk, true); - if (rc) { + if (IS_ERR(rt)) { + rc = PTR_ERR(rt); if (rc == -ENETUNREACH) IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); goto out; @@ -489,7 +490,8 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m * itself out. */ security_sk_classify_flow(sk, &fl); - if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk)) + rt = ip_route_output_flow(sock_net(sk), &fl, sk); + if (IS_ERR(rt)) goto no_route; } sk_setup_caps(sk, &rt->dst); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 6264219..878f6dd 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -103,7 +103,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, .fl4_tos = rtos, }; - if (ip_route_output_key(net, &rt, &fl)) { + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &dest->addr.ip); @@ -121,7 +122,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, .fl4_tos = rtos, }; - if (ip_route_output_key(net, &rt, &fl)) { + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; @@ -180,7 +182,8 @@ __ip_vs_reroute_locally(struct sk_buff *skb) .mark = skb->mark, }; - if (ip_route_output_key(net, &rt, &fl)) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) return 0; if (!(rt->rt_flags & RTCF_LOCAL)) { ip_rt_put(rt); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 5128a6c..624725b 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -73,7 +73,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) fl.fl4_dst = info->gw.ip; fl.fl4_tos = RT_TOS(iph->tos); fl.fl4_scope = RT_SCOPE_UNIVERSE; - if (ip_route_output_key(net, &rt, &fl) != 0) + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) return false; skb_dst_drop(skb); diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index a53fb25..3620c56 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -37,7 +37,6 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) { struct rtable *rt; struct flowi fl; - int ret; peer->if_mtu = 1500; @@ -58,9 +57,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) BUG(); } - ret = ip_route_output_key(&init_net, &rt, &fl); - if (ret < 0) { - _leave(" [route err %d]", ret); + rt = ip_route_output_key(&init_net, &fl); + if (IS_ERR(rt)) { + _leave(" [route err %ld]", PTR_ERR(rt)); return; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e58f947..4e55e6c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -491,9 +491,9 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", __func__, &fl.fl4_dst, &fl.fl4_src); - if (!ip_route_output_key(&init_net, &rt, &fl)) { + rt = ip_route_output_key(&init_net, &fl); + if (!IS_ERR(rt)) dst = &rt->dst; - } /* If there is no association or if a source address is passed, no * more validation is required. @@ -535,7 +535,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; fl.fl_ip_sport = laddr->a.v4.sin_port; - if (!ip_route_output_key(&init_net, &rt, &fl)) { + rt = ip_route_output_key(&init_net, &fl); + if (!IS_ERR(rt)) { dst = &rt->dst; goto out_unlock; } -- cgit v1.1 From 5bfa787fb2c29cce0722500f90df29e049ff07fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 2 Mar 2011 14:56:30 -0800 Subject: ipv4: ip_route_output_key() is better as an inline. This avoid a stack frame at zero cost. Signed-off-by: David S. Miller --- net/ipv4/route.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5090e95..432eee6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2736,12 +2736,6 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi *flp, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -struct rtable *ip_route_output_key(struct net *net, struct flowi *flp) -{ - return ip_route_output_flow(net, flp, NULL); -} -EXPORT_SYMBOL(ip_route_output_key); - static int rt_fill_info(struct net *net, struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) -- cgit v1.1 From eed84713bc47ce2f7d675914f297ad9b6227a587 Mon Sep 17 00:00:00 2001 From: Shmulik Ravid Date: Sun, 27 Feb 2011 05:04:31 +0000 Subject: dcbnl: add support for retrieving peer configuration - ieee These 2 patches add the support for retrieving the remote or peer DCBX configuration via dcbnl for embedded DCBX stacks. The peer configuration is part of the DCBX MIB and is useful for debugging and diagnostics of the overall DCB configuration. The first patch add this support for IEEE 802.1Qaz standard the second patch add the same support for the older CEE standard. Diff for v2 - the peer-app-info is CEE specific. Signed-off-by: Shmulik Ravid Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'net') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d5074a5..2e6dcf2 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1224,6 +1224,54 @@ err: return err; } +static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb) +{ + struct dcb_peer_app_info info; + struct dcb_app *table = NULL; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + u16 app_count; + int err; + + + /** + * retrieve the peer app configuration form the driver. If the driver + * handlers fail exit without doing anything + */ + err = ops->peer_getappinfo(netdev, &info, &app_count); + if (!err && app_count) { + table = kmalloc(sizeof(struct dcb_app) * app_count, GFP_KERNEL); + if (!table) + return -ENOMEM; + + err = ops->peer_getapptable(netdev, table); + } + + if (!err) { + u16 i; + struct nlattr *app; + + /** + * build the message, from here on the only possible failure + * is due to the skb size + */ + err = -EMSGSIZE; + + app = nla_nest_start(skb, DCB_ATTR_IEEE_PEER_APP); + if (!app) + goto nla_put_failure; + + for (i = 0; i < app_count; i++) + NLA_PUT(skb, DCB_ATTR_IEEE_APP, sizeof(struct dcb_app), + &table[i]); + + nla_nest_end(skb, app); + } + err = 0; + +nla_put_failure: + kfree(table); + return err; +} /* Handle IEEE 802.1Qaz GET commands. */ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, @@ -1288,6 +1336,27 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, spin_unlock(&dcb_lock); nla_nest_end(skb, app); + /* get peer info if available */ + if (ops->ieee_peer_getets) { + struct ieee_ets ets; + err = ops->ieee_peer_getets(netdev, &ets); + if (!err) + NLA_PUT(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets); + } + + if (ops->ieee_peer_getpfc) { + struct ieee_pfc pfc; + err = ops->ieee_peer_getpfc(netdev, &pfc); + if (!err) + NLA_PUT(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc); + } + + if (ops->peer_getappinfo && ops->peer_getapptable) { + err = dcbnl_build_peer_app(netdev, skb); + if (err) + goto nla_put_failure; + } + nla_nest_end(skb, ieee); nlmsg_end(skb, nlh); -- cgit v1.1 From dc6ed1df5a5f84e45e77e2acb6fd99b995414956 Mon Sep 17 00:00:00 2001 From: Shmulik Ravid Date: Sun, 27 Feb 2011 05:04:38 +0000 Subject: dcbnl: add support for retrieving peer configuration - cee This patch adds the support for retrieving the remote or peer DCBX configuration via dcbnl for embedded DCBX stacks supporting the CEE DCBX standard. Signed-off-by: Shmulik Ravid Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 2e6dcf2..d8b4f72 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1224,7 +1224,9 @@ err: return err; } -static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb) +static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb, + int app_nested_type, int app_info_type, + int app_entry_type) { struct dcb_peer_app_info info; struct dcb_app *table = NULL; @@ -1256,12 +1258,15 @@ static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb) */ err = -EMSGSIZE; - app = nla_nest_start(skb, DCB_ATTR_IEEE_PEER_APP); + app = nla_nest_start(skb, app_nested_type); if (!app) goto nla_put_failure; + if (app_info_type) + NLA_PUT(skb, app_info_type, sizeof(info), &info); + for (i = 0; i < app_count; i++) - NLA_PUT(skb, DCB_ATTR_IEEE_APP, sizeof(struct dcb_app), + NLA_PUT(skb, app_entry_type, sizeof(struct dcb_app), &table[i]); nla_nest_end(skb, app); @@ -1352,7 +1357,10 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, } if (ops->peer_getappinfo && ops->peer_getapptable) { - err = dcbnl_build_peer_app(netdev, skb); + err = dcbnl_build_peer_app(netdev, skb, + DCB_ATTR_IEEE_PEER_APP, + DCB_ATTR_IEEE_APP_UNSPEC, + DCB_ATTR_IEEE_APP); if (err) goto nla_put_failure; } @@ -1510,6 +1518,71 @@ err: return ret; } +/* Handle CEE DCBX GET commands. */ +static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *cee; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_CEE_GET; + + NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); + + cee = nla_nest_start(skb, DCB_ATTR_CEE); + if (!cee) + goto nla_put_failure; + + /* get peer info if available */ + if (ops->cee_peer_getpg) { + struct cee_pg pg; + err = ops->cee_peer_getpg(netdev, &pg); + if (!err) + NLA_PUT(skb, DCB_ATTR_CEE_PEER_PG, sizeof(pg), &pg); + } + + if (ops->cee_peer_getpfc) { + struct cee_pfc pfc; + err = ops->cee_peer_getpfc(netdev, &pfc); + if (!err) + NLA_PUT(skb, DCB_ATTR_CEE_PEER_PFC, sizeof(pfc), &pfc); + } + + if (ops->peer_getappinfo && ops->peer_getapptable) { + err = dcbnl_build_peer_app(netdev, skb, + DCB_ATTR_CEE_PEER_APP_TABLE, + DCB_ATTR_CEE_PEER_APP_INFO, + DCB_ATTR_CEE_PEER_APP); + if (err) + goto nla_put_failure; + } + + nla_nest_end(skb, cee); + nlmsg_end(skb, nlh); + + return rtnl_unicast(skb, &init_net, pid); +nla_put_failure: + nlmsg_cancel(skb, nlh); +nlmsg_failure: + kfree_skb(skb); + return -1; +} + static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1639,6 +1712,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_CEE_GET: + ret = dcbnl_cee_get(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } -- cgit v1.1 From 06dc94b1ed05f91e246315afeb1c652d6d0dc9ab Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 3 Mar 2011 10:38:01 -0800 Subject: ipv4: Fix crash in dst_release when udp_sendmsg route lookup fails. As reported by Eric: [11483.697233] IP: [] dst_release+0x18/0x60 ... [11483.697741] Call Trace: [11483.697764] [] udp_sendmsg+0x282/0x6e0 [11483.697790] [] ? memcpy_toiovec+0x51/0x70 [11483.697818] [] ? ip_generic_getfrag+0x0/0xb0 The pointer passed to dst_release() is -EINVAL, that's because we leave an error pointer in the local variable "rt" by accident. NULL it out to fix the bug. Reported-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 95e0c2c..c9a73e5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -925,6 +925,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = ip_route_output_flow(net, &fl, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); + rt = NULL; if (err == -ENETUNREACH) IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); goto out; -- cgit v1.1 From c53fa1ed92cd671a1dfb1e7569e9ab672612ddc6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Mar 2011 10:55:40 -0800 Subject: netlink: kill loginuid/sessionid/sid members from struct netlink_skb_parms Netlink message processing in the kernel is synchronous these days, the session information can be collected when needed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netlabel/netlabel_user.h | 6 ++--- net/netlink/af_netlink.c | 3 --- net/xfrm/xfrm_user.c | 56 +++++++++++++++++++++++++------------------- 3 files changed, 35 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 6caef8b..f4fc4c9 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -49,9 +49,9 @@ static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, struct netlbl_audit *audit_info) { - audit_info->secid = NETLINK_CB(skb).sid; - audit_info->loginuid = NETLINK_CB(skb).loginuid; - audit_info->sessionid = NETLINK_CB(skb).sessionid; + security_task_getsecid(current, &audit_info->secid); + audit_info->loginuid = audit_get_loginuid(current); + audit_info->sessionid = audit_get_sessionid(current); } /* NetLabel NETLINK I/O functions */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 478181d..97ecd92 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1362,9 +1362,6 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).pid = nlk->pid; NETLINK_CB(skb).dst_group = dst_group; - NETLINK_CB(skb).loginuid = audit_get_loginuid(current); - NETLINK_CB(skb).sessionid = audit_get_sessionid(current); - security_task_getsecid(current, &(NETLINK_CB(skb).sid)); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); /* What can I do? Netlink is asynchronous, so that diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 673698d..468ab60 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -497,9 +497,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; int err; struct km_event c; - uid_t loginuid = NETLINK_CB(skb).loginuid; - u32 sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; err = verify_newsa_info(p, attrs); if (err) @@ -515,6 +515,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, else err = xfrm_state_update(x); + security_task_getsecid(current, &sid); xfrm_audit_state_add(x, err ? 0 : 1, loginuid, sessionid, sid); if (err < 0) { @@ -575,9 +576,9 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); - uid_t loginuid = NETLINK_CB(skb).loginuid; - u32 sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) @@ -602,6 +603,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_notify(x, &c); out: + security_task_getsecid(current, &sid); xfrm_audit_state_delete(x, err ? 0 : 1, loginuid, sessionid, sid); xfrm_state_put(x); return err; @@ -1265,9 +1267,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int err; int excl; - uid_t loginuid = NETLINK_CB(skb).loginuid; - u32 sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; err = verify_newpolicy_info(p); if (err) @@ -1286,6 +1288,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); + security_task_getsecid(current, &sid); xfrm_audit_policy_add(xp, err ? 0 : 1, loginuid, sessionid, sid); if (err) { @@ -1522,10 +1525,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, NETLINK_CB(skb).pid); } } else { - uid_t loginuid = NETLINK_CB(skb).loginuid; - u32 sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; + security_task_getsecid(current, &sid); xfrm_audit_policy_delete(xp, err ? 0 : 1, loginuid, sessionid, sid); @@ -1553,9 +1557,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_audit audit_info; int err; - audit_info.loginuid = NETLINK_CB(skb).loginuid; - audit_info.sessionid = NETLINK_CB(skb).sessionid; - audit_info.secid = NETLINK_CB(skb).sid; + audit_info.loginuid = audit_get_loginuid(current); + audit_info.sessionid = audit_get_sessionid(current); + security_task_getsecid(current, &audit_info.secid); err = xfrm_state_flush(net, p->proto, &audit_info); if (err) { if (err == -ESRCH) /* empty table */ @@ -1720,9 +1724,9 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; - audit_info.loginuid = NETLINK_CB(skb).loginuid; - audit_info.sessionid = NETLINK_CB(skb).sessionid; - audit_info.secid = NETLINK_CB(skb).sid; + audit_info.loginuid = audit_get_loginuid(current); + audit_info.sessionid = audit_get_sessionid(current); + security_task_getsecid(current, &audit_info.secid); err = xfrm_policy_flush(net, type, &audit_info); if (err) { if (err == -ESRCH) /* empty table */ @@ -1789,9 +1793,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, err = 0; if (up->hard) { - uid_t loginuid = NETLINK_CB(skb).loginuid; - uid_t sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; + + security_task_getsecid(current, &sid); xfrm_policy_delete(xp, p->dir); xfrm_audit_policy_delete(xp, 1, loginuid, sessionid, sid); @@ -1830,9 +1836,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, km_state_expired(x, ue->hard, current->pid); if (ue->hard) { - uid_t loginuid = NETLINK_CB(skb).loginuid; - uid_t sessionid = NETLINK_CB(skb).sessionid; - u32 sid = NETLINK_CB(skb).sid; + uid_t loginuid = audit_get_loginuid(current); + u32 sessionid = audit_get_sessionid(current); + u32 sid; + + security_task_getsecid(current, &sid); __xfrm_state_delete(x); xfrm_audit_state_delete(x, 1, loginuid, sessionid, sid); } -- cgit v1.1 From d276055c4e90a7278cd5167ba9755c9b214bcff7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 Mar 2011 11:10:02 -0800 Subject: net_sched: reduce fifo qdisc size Because of various alignements [SLUB / qdisc], we use 512 bytes of memory for one {p|b}fifo qdisc, instead of 256 bytes on 64bit arches and 192 bytes on 32bit ones. Move the "u32 limit" inside "struct Qdisc" (no impact on other qdiscs) Change qdisc_alloc(), first trying a regular allocation before an oversized one. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fifo.c | 34 +++++++++++----------------------- net/sched/sch_generic.c | 18 +++++++++++------- 2 files changed, 22 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index be33f9d..66effe2 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -19,15 +19,9 @@ /* 1 band FIFO pseudo-"scheduler" */ -struct fifo_sched_data { - u32 limit; -}; - static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - struct fifo_sched_data *q = qdisc_priv(sch); - - if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit)) + if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_reshape_fail(skb, sch); @@ -35,9 +29,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - struct fifo_sched_data *q = qdisc_priv(sch); - - if (likely(skb_queue_len(&sch->q) < q->limit)) + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_reshape_fail(skb, sch); @@ -45,9 +37,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - struct fifo_sched_data *q = qdisc_priv(sch); - - if (likely(skb_queue_len(&sch->q) < q->limit)) + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); /* queue full, remove one skb to fulfill the limit */ @@ -60,7 +50,6 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int fifo_init(struct Qdisc *sch, struct nlattr *opt) { - struct fifo_sched_data *q = qdisc_priv(sch); bool bypass; bool is_bfifo = sch->ops == &bfifo_qdisc_ops; @@ -70,20 +59,20 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) if (is_bfifo) limit *= psched_mtu(qdisc_dev(sch)); - q->limit = limit; + sch->limit = limit; } else { struct tc_fifo_qopt *ctl = nla_data(opt); if (nla_len(opt) < sizeof(*ctl)) return -EINVAL; - q->limit = ctl->limit; + sch->limit = ctl->limit; } if (is_bfifo) - bypass = q->limit >= psched_mtu(qdisc_dev(sch)); + bypass = sch->limit >= psched_mtu(qdisc_dev(sch)); else - bypass = q->limit >= 1; + bypass = sch->limit >= 1; if (bypass) sch->flags |= TCQ_F_CAN_BYPASS; @@ -94,8 +83,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) { - struct fifo_sched_data *q = qdisc_priv(sch); - struct tc_fifo_qopt opt = { .limit = q->limit }; + struct tc_fifo_qopt opt = { .limit = sch->limit }; NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; @@ -106,7 +94,7 @@ nla_put_failure: struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .id = "pfifo", - .priv_size = sizeof(struct fifo_sched_data), + .priv_size = 0, .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -121,7 +109,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops); struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .id = "bfifo", - .priv_size = sizeof(struct fifo_sched_data), + .priv_size = 0, .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, @@ -136,7 +124,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops); struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { .id = "pfifo_head_drop", - .priv_size = sizeof(struct fifo_sched_data), + .priv_size = 0, .enqueue = pfifo_tail_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0da09d5..a854cab 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -550,21 +550,25 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, { void *p; struct Qdisc *sch; - unsigned int size; + unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; int err = -ENOBUFS; - /* ensure that the Qdisc and the private data are 64-byte aligned */ - size = QDISC_ALIGN(sizeof(*sch)); - size += ops->priv_size + (QDISC_ALIGNTO - 1); - p = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue)); if (!p) goto errout; sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); - sch->padded = (char *) sch - (char *) p; - + /* if we got non aligned memory, ask more and do alignment ourself */ + if (sch != p) { + kfree(p); + p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL, + netdev_queue_numa_node_read(dev_queue)); + if (!p) + goto errout; + sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); + sch->padded = (char *) sch - (char *) p; + } INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); spin_lock_init(&sch->busylock); -- cgit v1.1 From e066008b38ca9ace1b6de8dbbac8ed460640791d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 3 Mar 2011 11:24:19 -0800 Subject: ipv4: Fix __ip_dev_find() to use ifa_local instead of ifa_address. Reported-by: Stephen Hemminger Reported-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9038928..ff53860 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -111,7 +111,7 @@ static inline unsigned int inet_addr_hash(struct net *net, __be32 addr) static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa) { - unsigned int hash = inet_addr_hash(net, ifa->ifa_address); + unsigned int hash = inet_addr_hash(net, ifa->ifa_local); spin_lock(&inet_addr_hash_lock); hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]); @@ -146,7 +146,7 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) if (!net_eq(dev_net(dev), net)) continue; - if (ifa->ifa_address == addr) { + if (ifa->ifa_local == addr) { result = dev; break; } -- cgit v1.1 From 29546a6404e3a4b5d13f0a9586eb5cf1c3b25167 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 3 Mar 2011 12:10:37 -0800 Subject: ipv6: Use ERR_CAST in addrconf_dst_alloc. Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 053a92e..59f2a58 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2022,12 +2022,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, if (IS_ERR(neigh)) { dst_free(&rt->dst); - /* We are casting this because that is the return - * value type. But an errno encoded pointer is the - * same regardless of the underlying pointer type, - * and that's what we are returning. So this is OK. - */ - return (struct rt6_info *) neigh; + return ERR_CAST(neigh); } rt->rt6i_nexthop = neigh; -- cgit v1.1 From 01a16b21d6adf992aa863186c3c4e561a57c1714 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 3 Mar 2011 13:32:07 -0800 Subject: netlink: kill eff_cap from struct netlink_skb_parms Netlink message processing in the kernel is synchronous these days, capabilities can be checked directly in security_netlink_recv() from the current process. Signed-off-by: Patrick McHardy Reviewed-by: James Morris [chrisw: update to include pohmelfs and uvesafb] Signed-off-by: Chris Wright Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 97ecd92..a808fb1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1364,12 +1364,6 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).dst_group = dst_group; memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - /* What can I do? Netlink is asynchronous, so that - we will have to save current capabilities to - check them, when this message will be delivered - to corresponding kernel module. --ANK (980802) - */ - err = -EFAULT; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { kfree_skb(skb); -- cgit v1.1 From 9d468d2269b64222a706f52b965998ee64d0b4bf Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Fri, 4 Mar 2011 13:31:31 +0100 Subject: mac80211: Remove redundant preamble and RTS flag setup in minstrel_ht mac80211 does the same afterwards anyway. Hence, just drop this redundant code. Signed-off-by: Helmut Schaa Acked-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/rc80211_minstrel_ht.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 165a451..775cf15 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -519,9 +519,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, rate->count = mr->retry_count; rate->flags = IEEE80211_TX_RC_MCS | group->flags; - if (txrc->short_preamble) - rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; - if (txrc->rts || rtscts) + if (rtscts) rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; } -- cgit v1.1 From 65e8354ec13a45414045084166cb340c0d7ffe8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Mar 2011 14:33:59 -0800 Subject: inetpeer: seqlock optimization David noticed : ------------------ Eric, I was profiling the non-routing-cache case and something that stuck out is the case of calling inet_getpeer() with create==0. If an entry is not found, we have to redo the lookup under a spinlock to make certain that a concurrent writer rebalancing the tree does not "hide" an existing entry from us. This makes the case of a create==0 lookup for a not-present entry really expensive. It is on the order of 600 cpu cycles on my Niagara2. I added a hack to not do the relookup under the lock when create==0 and it now costs less than 300 cycles. This is now a pretty common operation with the way we handle COW'd metrics, so I think it's definitely worth optimizing. ----------------- One solution is to use a seqlock instead of a spinlock to protect struct inet_peer_base. After a failed avl tree lookup, we can easily detect if a writer did some changes during our lookup. Taking the lock and redo the lookup is only necessary in this case. Note: Add one private rcu_deref_locked() macro to place in one spot the access to spinlock included in seqlock. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 75 +++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 48f8d45..f604ffd 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -81,19 +81,19 @@ static const struct inet_peer peer_fake_node = { struct inet_peer_base { struct inet_peer __rcu *root; - spinlock_t lock; + seqlock_t lock; int total; }; static struct inet_peer_base v4_peers = { .root = peer_avl_empty_rcu, - .lock = __SPIN_LOCK_UNLOCKED(v4_peers.lock), + .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), .total = 0, }; static struct inet_peer_base v6_peers = { .root = peer_avl_empty_rcu, - .lock = __SPIN_LOCK_UNLOCKED(v6_peers.lock), + .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), .total = 0, }; @@ -177,6 +177,9 @@ static int addr_compare(const struct inetpeer_addr *a, return 0; } +#define rcu_deref_locked(X, BASE) \ + rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) + /* * Called with local BH disabled and the pool lock held. */ @@ -187,8 +190,7 @@ static int addr_compare(const struct inetpeer_addr *a, \ stackptr = _stack; \ *stackptr++ = &_base->root; \ - for (u = rcu_dereference_protected(_base->root, \ - lockdep_is_held(&_base->lock)); \ + for (u = rcu_deref_locked(_base->root, _base); \ u != peer_avl_empty; ) { \ int cmp = addr_compare(_daddr, &u->daddr); \ if (cmp == 0) \ @@ -198,8 +200,7 @@ static int addr_compare(const struct inetpeer_addr *a, else \ v = &u->avl_right; \ *stackptr++ = v; \ - u = rcu_dereference_protected(*v, \ - lockdep_is_held(&_base->lock)); \ + u = rcu_deref_locked(*v, _base); \ } \ u; \ }) @@ -246,13 +247,11 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, struct inet_peer __rcu **v; \ *stackptr++ = &start->avl_left; \ v = &start->avl_left; \ - for (u = rcu_dereference_protected(*v, \ - lockdep_is_held(&base->lock)); \ + for (u = rcu_deref_locked(*v, base); \ u->avl_right != peer_avl_empty_rcu; ) { \ v = &u->avl_right; \ *stackptr++ = v; \ - u = rcu_dereference_protected(*v, \ - lockdep_is_held(&base->lock)); \ + u = rcu_deref_locked(*v, base); \ } \ u; \ }) @@ -271,21 +270,16 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], while (stackend > stack) { nodep = *--stackend; - node = rcu_dereference_protected(*nodep, - lockdep_is_held(&base->lock)); - l = rcu_dereference_protected(node->avl_left, - lockdep_is_held(&base->lock)); - r = rcu_dereference_protected(node->avl_right, - lockdep_is_held(&base->lock)); + node = rcu_deref_locked(*nodep, base); + l = rcu_deref_locked(node->avl_left, base); + r = rcu_deref_locked(node->avl_right, base); lh = node_height(l); rh = node_height(r); if (lh > rh + 1) { /* l: RH+2 */ struct inet_peer *ll, *lr, *lrl, *lrr; int lrh; - ll = rcu_dereference_protected(l->avl_left, - lockdep_is_held(&base->lock)); - lr = rcu_dereference_protected(l->avl_right, - lockdep_is_held(&base->lock)); + ll = rcu_deref_locked(l->avl_left, base); + lr = rcu_deref_locked(l->avl_right, base); lrh = node_height(lr); if (lrh <= node_height(ll)) { /* ll: RH+1 */ RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ @@ -296,10 +290,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], l->avl_height = node->avl_height + 1; RCU_INIT_POINTER(*nodep, l); } else { /* ll: RH, lr: RH+1 */ - lrl = rcu_dereference_protected(lr->avl_left, - lockdep_is_held(&base->lock)); /* lrl: RH or RH-1 */ - lrr = rcu_dereference_protected(lr->avl_right, - lockdep_is_held(&base->lock)); /* lrr: RH or RH-1 */ + lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */ + lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */ RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ node->avl_height = rh + 1; /* node: RH+1 */ @@ -314,10 +306,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], } else if (rh > lh + 1) { /* r: LH+2 */ struct inet_peer *rr, *rl, *rlr, *rll; int rlh; - rr = rcu_dereference_protected(r->avl_right, - lockdep_is_held(&base->lock)); - rl = rcu_dereference_protected(r->avl_left, - lockdep_is_held(&base->lock)); + rr = rcu_deref_locked(r->avl_right, base); + rl = rcu_deref_locked(r->avl_left, base); rlh = node_height(rl); if (rlh <= node_height(rr)) { /* rr: LH+1 */ RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ @@ -328,10 +318,8 @@ static void peer_avl_rebalance(struct inet_peer __rcu **stack[], r->avl_height = node->avl_height + 1; RCU_INIT_POINTER(*nodep, r); } else { /* rr: RH, rl: RH+1 */ - rlr = rcu_dereference_protected(rl->avl_right, - lockdep_is_held(&base->lock)); /* rlr: LH or LH-1 */ - rll = rcu_dereference_protected(rl->avl_left, - lockdep_is_held(&base->lock)); /* rll: LH or LH-1 */ + rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */ + rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */ RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ node->avl_height = lh + 1; /* node: LH+1 */ @@ -372,7 +360,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) do_free = 0; - spin_lock_bh(&base->lock); + write_seqlock_bh(&base->lock); /* Check the reference counter. It was artificially incremented by 1 * in cleanup() function to prevent sudden disappearing. If we can * atomically (because of lockless readers) take this last reference, @@ -392,8 +380,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) /* look for a node to insert instead of p */ struct inet_peer *t; t = lookup_rightempty(p, base); - BUG_ON(rcu_dereference_protected(*stackptr[-1], - lockdep_is_held(&base->lock)) != t); + BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); **--stackptr = t->avl_left; /* t is removed, t->daddr > x->daddr for any * x in p->avl_left subtree. @@ -409,7 +396,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) base->total--; do_free = 1; } - spin_unlock_bh(&base->lock); + write_sequnlock_bh(&base->lock); if (do_free) call_rcu_bh(&p->rcu, inetpeer_free_rcu); @@ -477,12 +464,16 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; struct inet_peer_base *base = family_to_base(daddr->family); struct inet_peer *p; + unsigned int sequence; + int invalidated; /* Look up for the address quickly, lockless. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock_bh(); + sequence = read_seqbegin(&base->lock); p = lookup_rcu_bh(daddr, base); + invalidated = read_seqretry(&base->lock, sequence); rcu_read_unlock_bh(); if (p) { @@ -493,14 +484,18 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) return p; } + /* If no writer did a change during our lookup, we can return early. */ + if (!create && !invalidated) + return NULL; + /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ - spin_lock_bh(&base->lock); + write_seqlock_bh(&base->lock); p = lookup(daddr, stack, base); if (p != peer_avl_empty) { atomic_inc(&p->refcnt); - spin_unlock_bh(&base->lock); + write_sequnlock_bh(&base->lock); /* Remove the entry from unused list if it was there. */ unlink_from_unused(p); return p; @@ -524,7 +519,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) link_to_pool(p, base); base->total++; } - spin_unlock_bh(&base->lock); + write_sequnlock_bh(&base->lock); if (base->total >= inet_peer_threshold) /* Remove one less-recently-used entry. */ -- cgit v1.1 From 44713b67db10c774f14280c129b0d5fd13c70cf2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Mar 2011 21:24:47 -0800 Subject: ipv4: Optimize flow initialization in output route lookup. We burn a lot of useless cycles, cpu store buffer traffic, and memory operations memset()'ing the on-stack flow used to perform output route lookups in __ip_route_output_key(). Only the first half of the flow object members even matter for output route lookups in this context, specifically: FIB rules matching cares about: dst, src, tos, iif, oif, mark FIB trie lookup cares about: dst FIB semantic match cares about: tos, scope, oif Therefore only initialize these specific members and elide the memset entirely. On Niagara2 this kills about ~300 cycles from the output route lookup path. Likely, we can take things further, since all callers of output route lookups essentially throw away the on-stack flow they use. So they don't care if we use it as a scratch-pad to compute the final flow key. Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- net/ipv4/route.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 432eee6..6c87403 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2431,14 +2431,7 @@ static struct rtable *ip_route_output_slow(struct net *net, const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); - struct flowi fl = { .fl4_dst = oldflp->fl4_dst, - .fl4_src = oldflp->fl4_src, - .fl4_tos = tos & IPTOS_RT_MASK, - .fl4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), - .mark = oldflp->mark, - .iif = net->loopback_dev->ifindex, - .oif = oldflp->oif }; + struct flowi fl; struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; @@ -2449,6 +2442,15 @@ static struct rtable *ip_route_output_slow(struct net *net, res.r = NULL; #endif + fl.oif = oldflp->oif; + fl.iif = net->loopback_dev->ifindex; + fl.mark = oldflp->mark; + fl.fl4_dst = oldflp->fl4_dst; + fl.fl4_src = oldflp->fl4_src; + fl.fl4_tos = tos & IPTOS_RT_MASK; + fl.fl4_scope = ((tos & RTO_ONLINK) ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); + rcu_read_lock(); if (oldflp->fl4_src) { rth = ERR_PTR(-EINVAL); -- cgit v1.1 From 3c0afdca44af795dd315c20cc525927a459abe30 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Mar 2011 21:26:07 -0800 Subject: ipv4: Get peer more cheaply in rt_init_metrics(). We know this is a new route object, so doing atomics and stuff makes no sense at all. Signed-off-by: David S. Miller --- net/ipv4/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6c87403..9794a2c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1759,9 +1759,9 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) if (rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS) create = 1; - rt_bind_peer(rt, create); - peer = rt->peer; + rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); if (peer) { + rt->rt_peer_genid = rt_peer_genid(); if (inet_metrics_new(peer)) memcpy(peer->metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX); -- cgit v1.1 From 1018b5c01636c7c6bda31a719bda34fc631db29a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Mar 2011 21:35:25 -0800 Subject: ipv4: Set rt->rt_iif more sanely on output routes. rt->rt_iif is only ever inspected on input routes, for example DCCP uses this to populate a route lookup flow key when generating replies to another packet. Therefore, setting it to anything other than zero on output routes makes no sense. Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9794a2c..602473c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2381,7 +2381,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->fl.mark = oldflp->mark; rth->rt_dst = fl->fl4_dst; rth->rt_src = fl->fl4_src; - rth->rt_iif = oldflp->oif ? : dev_out->ifindex; + rth->rt_iif = 0; /* get references to the devices that are to be hold by the routing cache entry */ rth->dst.dev = dev_out; -- cgit v1.1 From 5e2b61f78411be25f0b84f97d5b5d312f184dfd1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Mar 2011 21:47:09 -0800 Subject: ipv4: Remove flowi from struct rtable. The only necessary parts are the src/dst addresses, the interface indexes, the TOS, and the mark. The rest is unnecessary bloat, which amounts to nearly 50 bytes on 64-bit. Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- net/ipv4/ipmr.c | 52 ++++++++++++---- net/ipv4/route.c | 153 ++++++++++++++++++++++++++---------------------- net/ipv4/xfrm4_policy.c | 7 ++- net/sched/cls_route.c | 2 +- net/sched/em_meta.c | 2 +- 6 files changed, 133 insertions(+), 85 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 994a785..1771ce6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -563,7 +563,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) rcu_read_lock(); if (rt_is_input_route(rt) && net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) - dev = dev_get_by_index_rcu(net, rt->fl.iif); + dev = dev_get_by_index_rcu(net, rt->rt_iif); if (dev) saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 26ca2f2..9d5f634 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1813,12 +1813,22 @@ int ip_mr_input(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_FORWARDED) goto dont_forward; - err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); - if (err < 0) { - kfree_skb(skb); - return err; + { + struct rtable *rt = skb_rtable(skb); + struct flowi fl = { + .fl4_dst = rt->rt_key_dst, + .fl4_src = rt->rt_key_src, + .fl4_tos = rt->rt_tos, + .oif = rt->rt_oif, + .iif = rt->rt_iif, + .mark = rt->rt_mark, + }; + err = ipmr_fib_lookup(net, &fl, &mrt); + if (err < 0) { + kfree_skb(skb); + return err; + } } - if (!local) { if (IPCB(skb)->opt.router_alert) { if (ip_call_ra_chain(skb)) @@ -1946,9 +1956,19 @@ int pim_rcv_v1(struct sk_buff *skb) pim = igmp_hdr(skb); - if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) - goto drop; - + { + struct rtable *rt = skb_rtable(skb); + struct flowi fl = { + .fl4_dst = rt->rt_key_dst, + .fl4_src = rt->rt_key_src, + .fl4_tos = rt->rt_tos, + .oif = rt->rt_oif, + .iif = rt->rt_iif, + .mark = rt->rt_mark, + }; + if (ipmr_fib_lookup(net, &fl, &mrt) < 0) + goto drop; + } if (!mrt->mroute_do_pim || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; @@ -1978,9 +1998,19 @@ static int pim_rcv(struct sk_buff *skb) csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; - if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0) - goto drop; - + { + struct rtable *rt = skb_rtable(skb); + struct flowi fl = { + .fl4_dst = rt->rt_key_dst, + .fl4_src = rt->rt_key_src, + .fl4_tos = rt->rt_tos, + .oif = rt->rt_oif, + .iif = rt->rt_iif, + .mark = rt->rt_mark, + }; + if (ipmr_fib_lookup(net, &fl, &mrt) < 0) + goto drop; + } if (__pim_rcv(mrt, skb, sizeof(*pim))) { drop: kfree_skb(skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 602473c..92a24ea 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -424,7 +424,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) dst_metric(&r->dst, RTAX_WINDOW), (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + dst_metric(&r->dst, RTAX_RTTVAR)), - r->fl.fl4_tos, + r->rt_tos, r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, r->dst.hh ? (r->dst.hh->hh_output == dev_queue_xmit) : 0, @@ -711,22 +711,22 @@ static inline bool rt_caching(const struct net *net) net->ipv4.sysctl_rt_cache_rebuild_count; } -static inline bool compare_hash_inputs(const struct flowi *fl1, - const struct flowi *fl2) +static inline bool compare_hash_inputs(const struct rtable *rt1, + const struct rtable *rt2) { - return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | - ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | - (fl1->iif ^ fl2->iif)) == 0); + return ((((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | + ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | + (rt1->rt_iif ^ rt2->rt_iif)) == 0); } -static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) +static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) { - return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | - ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | - (fl1->mark ^ fl2->mark) | - (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + return (((__force u32)rt1->rt_key_dst ^ (__force u32)rt2->rt_key_dst) | + ((__force u32)rt1->rt_key_src ^ (__force u32)rt2->rt_key_src) | + (rt1->rt_mark ^ rt2->rt_mark) | + (rt1->rt_tos ^ rt2->rt_tos) | + (rt1->rt_oif ^ rt2->rt_oif) | + (rt1->rt_iif ^ rt2->rt_iif)) == 0; } static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) @@ -813,7 +813,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) const struct rtable *aux = head; while (aux != rth) { - if (compare_hash_inputs(&aux->fl, &rth->fl)) + if (compare_hash_inputs(aux, rth)) return 0; aux = rcu_dereference_protected(aux->dst.rt_next, 1); } @@ -1073,7 +1073,7 @@ restart: rt_free(rth); continue; } - if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { + if (compare_keys(rth, rt) && compare_netns(rth, rt)) { /* Put it first */ *rthp = rth->dst.rt_next; /* @@ -1136,7 +1136,7 @@ restart: rt_emergency_hash_rebuild(net); spin_unlock_bh(rt_hash_lock_addr(hash)); - hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, + hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, ifindex, rt_genid(net)); goto restart; } @@ -1344,12 +1344,12 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) ip_rt_put(rt); ret = NULL; } else if (rt->rt_flags & RTCF_REDIRECTED) { - unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, - rt->fl.oif, + unsigned hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, + rt->rt_oif, rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ipv4_negative_advice: redirect to %pI4/%02x dropped\n", - &rt->rt_dst, rt->fl.fl4_tos); + &rt->rt_dst, rt->rt_tos); #endif rt_del(hash, rt); ret = NULL; @@ -1697,8 +1697,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt_is_output_route(rt)) src = rt->rt_src; else { + struct flowi fl = { + .fl4_dst = rt->rt_key_dst, + .fl4_src = rt->rt_key_src, + .fl4_tos = rt->rt_tos, + .oif = rt->rt_oif, + .iif = rt->rt_iif, + .mark = rt->rt_mark, + }; + rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) + if (fib_lookup(dev_net(rt->dst.dev), &fl, &res) == 0) src = FIB_RES_PREFSRC(res); else src = inet_select_addr(rt->dst.dev, rt->rt_gateway, @@ -1748,7 +1757,8 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) return mtu; } -static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) +static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, + struct fib_info *fi) { struct inet_peer *peer; int create = 0; @@ -1756,7 +1766,7 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) /* If a peer entry exists for this destination, we must hook * it up in order to get at cached metrics. */ - if (rt->fl.flags & FLOWI_FLAG_PRECOW_METRICS) + if (oldflp && (oldflp->flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); @@ -1783,7 +1793,8 @@ static void rt_init_metrics(struct rtable *rt, struct fib_info *fi) } } -static void rt_set_nexthop(struct rtable *rt, const struct fib_result *res, +static void rt_set_nexthop(struct rtable *rt, const struct flowi *oldflp, + const struct fib_result *res, struct fib_info *fi, u16 type, u32 itag) { struct dst_entry *dst = &rt->dst; @@ -1792,7 +1803,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct fib_result *res, if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - rt_init_metrics(rt, fi); + rt_init_metrics(rt, oldflp, fi); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif @@ -1861,20 +1872,19 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->dst.output = ip_rt_bug; - rth->fl.fl4_dst = daddr; + rth->rt_key_dst = daddr; rth->rt_dst = daddr; - rth->fl.fl4_tos = tos; - rth->fl.mark = skb->mark; - rth->fl.fl4_src = saddr; + rth->rt_tos = tos; + rth->rt_mark = skb->mark; + rth->rt_key_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif - rth->rt_iif = - rth->fl.iif = dev->ifindex; + rth->rt_iif = dev->ifindex; rth->dst.dev = init_net.loopback_dev; dev_hold(rth->dst.dev); - rth->fl.oif = 0; + rth->rt_oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_genid = rt_genid(dev_net(dev)); @@ -1999,25 +2009,24 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - rth->fl.fl4_dst = daddr; + rth->rt_key_dst = daddr; rth->rt_dst = daddr; - rth->fl.fl4_tos = tos; - rth->fl.mark = skb->mark; - rth->fl.fl4_src = saddr; + rth->rt_tos = tos; + rth->rt_mark = skb->mark; + rth->rt_key_src = saddr; rth->rt_src = saddr; rth->rt_gateway = daddr; - rth->rt_iif = - rth->fl.iif = in_dev->dev->ifindex; + rth->rt_iif = in_dev->dev->ifindex; rth->dst.dev = (out_dev)->dev; dev_hold(rth->dst.dev); - rth->fl.oif = 0; + rth->rt_oif = 0; rth->rt_spec_dst= spec_dst; rth->dst.input = ip_forward; rth->dst.output = ip_output; rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); - rt_set_nexthop(rth, res, res->fi, res->type, itag); + rt_set_nexthop(rth, NULL, res, res->fi, res->type, itag); rth->rt_flags = flags; @@ -2172,17 +2181,16 @@ local_input: rth->dst.output= ip_rt_bug; rth->rt_genid = rt_genid(net); - rth->fl.fl4_dst = daddr; + rth->rt_key_dst = daddr; rth->rt_dst = daddr; - rth->fl.fl4_tos = tos; - rth->fl.mark = skb->mark; - rth->fl.fl4_src = saddr; + rth->rt_tos = tos; + rth->rt_mark = skb->mark; + rth->rt_key_src = saddr; rth->rt_src = saddr; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif - rth->rt_iif = - rth->fl.iif = dev->ifindex; + rth->rt_iif = dev->ifindex; rth->dst.dev = net->loopback_dev; dev_hold(rth->dst.dev); rth->rt_gateway = daddr; @@ -2261,12 +2269,12 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->dst.rt_next)) { - if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | - ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | - (rth->fl.iif ^ iif) | - rth->fl.oif | - (rth->fl.fl4_tos ^ tos)) == 0 && - rth->fl.mark == skb->mark && + if ((((__force u32)rth->rt_key_dst ^ (__force u32)daddr) | + ((__force u32)rth->rt_key_src ^ (__force u32)saddr) | + (rth->rt_iif ^ iif) | + rth->rt_oif | + (rth->rt_tos ^ tos)) == 0 && + rth->rt_mark == skb->mark && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { if (noref) { @@ -2374,11 +2382,11 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (!rth) return ERR_PTR(-ENOBUFS); - rth->fl.fl4_dst = oldflp->fl4_dst; - rth->fl.fl4_tos = tos; - rth->fl.fl4_src = oldflp->fl4_src; - rth->fl.oif = oldflp->oif; - rth->fl.mark = oldflp->mark; + rth->rt_key_dst = oldflp->fl4_dst; + rth->rt_tos = tos; + rth->rt_key_src = oldflp->fl4_src; + rth->rt_oif = oldflp->oif; + rth->rt_mark = oldflp->mark; rth->rt_dst = fl->fl4_dst; rth->rt_src = fl->fl4_src; rth->rt_iif = 0; @@ -2416,7 +2424,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #endif } - rt_set_nexthop(rth, res, fi, type, 0); + rt_set_nexthop(rth, oldflp, res, fi, type, 0); rth->rt_flags = flags; return rth; @@ -2629,12 +2637,12 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; rth = rcu_dereference_bh(rth->dst.rt_next)) { - if (rth->fl.fl4_dst == flp->fl4_dst && - rth->fl.fl4_src == flp->fl4_src && + if (rth->rt_key_dst == flp->fl4_dst && + rth->rt_key_src == flp->fl4_src && rt_is_output_route(rth) && - rth->fl.oif == flp->oif && - rth->fl.mark == flp->mark && - !((rth->fl.fl4_tos ^ flp->fl4_tos) & + rth->rt_oif == flp->oif && + rth->rt_mark == flp->mark && + !((rth->rt_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { @@ -2693,7 +2701,12 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or if (new->dev) dev_hold(new->dev); - rt->fl = ort->fl; + rt->rt_key_dst = ort->rt_key_dst; + rt->rt_key_src = ort->rt_key_src; + rt->rt_tos = ort->rt_tos; + rt->rt_iif = ort->rt_iif; + rt->rt_oif = ort->rt_oif; + rt->rt_mark = ort->rt_mark; rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; @@ -2756,7 +2769,7 @@ static int rt_fill_info(struct net *net, r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; - r->rtm_tos = rt->fl.fl4_tos; + r->rtm_tos = rt->rt_tos; r->rtm_table = RT_TABLE_MAIN; NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; @@ -2768,9 +2781,9 @@ static int rt_fill_info(struct net *net, NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst); - if (rt->fl.fl4_src) { + if (rt->rt_key_src) { r->rtm_src_len = 32; - NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); + NLA_PUT_BE32(skb, RTA_SRC, rt->rt_key_src); } if (rt->dst.dev) NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); @@ -2780,7 +2793,7 @@ static int rt_fill_info(struct net *net, #endif if (rt_is_input_route(rt)) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); - else if (rt->rt_src != rt->fl.fl4_src) + else if (rt->rt_src != rt->rt_key_src) NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src); if (rt->rt_dst != rt->rt_gateway) @@ -2789,8 +2802,8 @@ static int rt_fill_info(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - if (rt->fl.mark) - NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); + if (rt->rt_mark) + NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); error = rt->dst.error; expires = (rt->peer && rt->peer->pmtu_expires) ? @@ -2824,7 +2837,7 @@ static int rt_fill_info(struct net *net, } } else #endif - NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); + NLA_PUT_U32(skb, RTA_IIF, rt->rt_iif); } if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 45b8214..c70c42e 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -70,7 +70,12 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, { struct rtable *rt = (struct rtable *)xdst->route; - xdst->u.rt.fl = *fl; + rt->rt_key_dst = fl->fl4_dst; + rt->rt_key_src = fl->fl4_src; + rt->rt_tos = fl->fl4_tos; + rt->rt_iif = fl->iif; + rt->rt_oif = fl->oif; + rt->rt_mark = fl->mark; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d580cdf..a9079053 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -143,7 +143,7 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, if (head == NULL) goto old_method; - iif = ((struct rtable *)dst)->fl.iif; + iif = ((struct rtable *)dst)->rt_iif; h = route4_fastmap_hash(id, iif); if (id == head->fastmap[h].id && diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index e5e1747..a4de67e 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -264,7 +264,7 @@ META_COLLECTOR(int_rtiif) if (unlikely(skb_rtable(skb) == NULL)) *err = -1; else - dst->value = skb_rtable(skb)->fl.iif; + dst->value = skb_rtable(skb)->rt_iif; } /************************************************************************** -- cgit v1.1 From a8e7f4bc38c4a90ee308cd7f1f8604f71db59d05 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sun, 12 Dec 2010 21:57:10 +0000 Subject: batman-adv: protect neighbor nodes with reference counters Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 20 +++++++++++++++----- net/batman-adv/originator.h | 8 +++++--- net/batman-adv/routing.c | 7 +++++++ net/batman-adv/types.h | 1 + 4 files changed, 28 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 54863c9..b1b1773 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -59,9 +59,18 @@ err: return 0; } -struct neigh_node * -create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, - uint8_t *neigh, struct batman_if *if_incoming) +void neigh_node_free_ref(struct kref *refcount) +{ + struct neigh_node *neigh_node; + + neigh_node = container_of(refcount, struct neigh_node, refcount); + kfree(neigh_node); +} + +struct neigh_node *create_neighbor(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + uint8_t *neigh, + struct batman_if *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *neigh_node; @@ -78,6 +87,7 @@ create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, memcpy(neigh_node->addr, neigh, ETH_ALEN); neigh_node->orig_node = orig_neigh_node; neigh_node->if_incoming = if_incoming; + kref_init(&neigh_node->refcount); list_add_tail(&neigh_node->list, &orig_node->neigh_list); return neigh_node; @@ -95,7 +105,7 @@ static void free_orig_node(void *data, void *arg) neigh_node = list_entry(list_pos, struct neigh_node, list); list_del(list_pos); - kfree(neigh_node); + kref_put(&neigh_node->refcount, neigh_node_free_ref); } frag_list_free(&orig_node->frag_list); @@ -216,7 +226,7 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, neigh_purged = true; list_del(list_pos); - kfree(neigh_node); + kref_put(&neigh_node->refcount, neigh_node_free_ref); } else { if ((!*best_neigh_node) || (neigh_node->tq_avg > (*best_neigh_node)->tq_avg)) diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 8019fbd..88e5c60 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -26,9 +26,11 @@ int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); -struct neigh_node * -create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, - uint8_t *neigh, struct batman_if *if_incoming); +struct neigh_node *create_neighbor(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + uint8_t *neigh, + struct batman_if *if_incoming); +void neigh_node_free_ref(struct kref *refcount); int orig_seq_print_text(struct seq_file *seq, void *offset); int orig_hash_add_if(struct batman_if *batman_if, int max_if_num); int orig_hash_del_if(struct batman_if *batman_if, int max_if_num); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 8274140..36351d3 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -89,6 +89,8 @@ static void update_route(struct bat_priv *bat_priv, struct neigh_node *neigh_node, unsigned char *hna_buff, int hna_buff_len) { + struct neigh_node *neigh_node_tmp; + /* route deleted */ if ((orig_node->router) && (!neigh_node)) { @@ -115,7 +117,12 @@ static void update_route(struct bat_priv *bat_priv, orig_node->router->addr); } + if (neigh_node) + kref_get(&neigh_node->refcount); + neigh_node_tmp = orig_node->router; orig_node->router = neigh_node; + if (neigh_node_tmp) + kref_put(&neigh_node_tmp->refcount, neigh_node_free_ref); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 7270405..f9217d5 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -115,6 +115,7 @@ struct neigh_node { struct neigh_node *next_bond_candidate; unsigned long last_valid; unsigned long real_bits[NUM_WORDS]; + struct kref refcount; struct orig_node *orig_node; struct batman_if *if_incoming; }; -- cgit v1.1 From 9591a79f280ede740e44aeb8ad93a6692d482dce Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sun, 12 Dec 2010 21:57:11 +0000 Subject: batman-adv: convert neighbor list to hlist Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 30 +++++++++++++++--------------- net/batman-adv/routing.c | 29 ++++++++++++++++++----------- net/batman-adv/types.h | 4 ++-- 3 files changed, 35 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index b1b1773..68b04e7 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -82,29 +82,28 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, if (!neigh_node) return NULL; - INIT_LIST_HEAD(&neigh_node->list); + INIT_HLIST_NODE(&neigh_node->list); memcpy(neigh_node->addr, neigh, ETH_ALEN); neigh_node->orig_node = orig_neigh_node; neigh_node->if_incoming = if_incoming; kref_init(&neigh_node->refcount); - list_add_tail(&neigh_node->list, &orig_node->neigh_list); + hlist_add_head(&neigh_node->list, &orig_node->neigh_list); return neigh_node; } static void free_orig_node(void *data, void *arg) { - struct list_head *list_pos, *list_pos_tmp; + struct hlist_node *node, *node_tmp; struct neigh_node *neigh_node; struct orig_node *orig_node = (struct orig_node *)data; struct bat_priv *bat_priv = (struct bat_priv *)arg; /* for all neighbors towards this originator ... */ - list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { - neigh_node = list_entry(list_pos, struct neigh_node, list); - - list_del(list_pos); + hlist_for_each_entry_safe(neigh_node, node, node_tmp, + &orig_node->neigh_list, list) { + hlist_del(&neigh_node->list); kref_put(&neigh_node->refcount, neigh_node_free_ref); } @@ -151,7 +150,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) if (!orig_node) return NULL; - INIT_LIST_HEAD(&orig_node->neigh_list); + INIT_HLIST_HEAD(&orig_node->neigh_list); memcpy(orig_node->orig, addr, ETH_ALEN); orig_node->router = NULL; @@ -195,15 +194,15 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, struct orig_node *orig_node, struct neigh_node **best_neigh_node) { - struct list_head *list_pos, *list_pos_tmp; + struct hlist_node *node, *node_tmp; struct neigh_node *neigh_node; bool neigh_purged = false; *best_neigh_node = NULL; /* for all neighbors towards this originator ... */ - list_for_each_safe(list_pos, list_pos_tmp, &orig_node->neigh_list) { - neigh_node = list_entry(list_pos, struct neigh_node, list); + hlist_for_each_entry_safe(neigh_node, node, node_tmp, + &orig_node->neigh_list, list) { if ((time_after(jiffies, neigh_node->last_valid + PURGE_TIMEOUT * HZ)) || @@ -225,7 +224,8 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, (neigh_node->last_valid / HZ)); neigh_purged = true; - list_del(list_pos); + + hlist_del(&neigh_node->list); kref_put(&neigh_node->refcount, neigh_node_free_ref); } else { if ((!*best_neigh_node) || @@ -328,7 +328,7 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *walk, *node; struct hlist_head *head; struct element_t *bucket; struct orig_node *orig_node; @@ -384,8 +384,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node->addr, neigh_node->if_incoming->net_dev->name); - list_for_each_entry(neigh_node, &orig_node->neigh_list, - list) { + hlist_for_each_entry(neigh_node, node, + &orig_node->neigh_list, list) { seq_printf(seq, " %pM (%3i)", neigh_node->addr, neigh_node->tq_avg); } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 36351d3..e8379ba 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -149,12 +149,12 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct hlist_node *node; unsigned char total_count; if (orig_node == orig_neigh_node) { - list_for_each_entry(tmp_neigh_node, - &orig_node->neigh_list, - list) { + hlist_for_each_entry(tmp_neigh_node, node, + &orig_node->neigh_list, list) { if (compare_orig(tmp_neigh_node->addr, orig_neigh_node->orig) && @@ -174,8 +174,8 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, neigh_node->last_valid = jiffies; } else { /* find packet count of corresponding one hop neighbor */ - list_for_each_entry(tmp_neigh_node, - &orig_neigh_node->neigh_list, list) { + hlist_for_each_entry(tmp_neigh_node, node, + &orig_neigh_node->neigh_list, list) { if (compare_orig(tmp_neigh_node->addr, orig_neigh_node->orig) && @@ -260,12 +260,14 @@ static void update_orig(struct bat_priv *bat_priv, char is_duplicate) { struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct hlist_node *node; int tmp_hna_buff_len; bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " "Searching and updating originator entry of received packet\n"); - list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + hlist_for_each_entry(tmp_neigh_node, node, + &orig_node->neigh_list, list) { if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && (tmp_neigh_node->if_incoming == if_incoming)) { neigh_node = tmp_neigh_node; @@ -391,6 +393,7 @@ static char count_real_packets(struct ethhdr *ethhdr, struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct orig_node *orig_node; struct neigh_node *tmp_neigh_node; + struct hlist_node *node; char is_duplicate = 0; int32_t seq_diff; int need_update = 0; @@ -407,7 +410,8 @@ static char count_real_packets(struct ethhdr *ethhdr, &orig_node->batman_seqno_reset)) return -1; - list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + hlist_for_each_entry(tmp_neigh_node, node, + &orig_node->neigh_list, list) { is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, orig_node->last_real_seqno, @@ -457,6 +461,7 @@ void update_bonding_candidates(struct orig_node *orig_node) int candidates; int interference_candidate; int best_tq; + struct hlist_node *node, *node2; struct neigh_node *tmp_neigh_node, *tmp_neigh_node2; struct neigh_node *first_candidate, *last_candidate; @@ -476,13 +481,15 @@ void update_bonding_candidates(struct orig_node *orig_node) * as "bonding partner" */ /* first, zero the list */ - list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + hlist_for_each_entry(tmp_neigh_node, node, + &orig_node->neigh_list, list) { tmp_neigh_node->next_bond_candidate = NULL; } first_candidate = NULL; last_candidate = NULL; - list_for_each_entry(tmp_neigh_node, &orig_node->neigh_list, list) { + hlist_for_each_entry(tmp_neigh_node, node, + &orig_node->neigh_list, list) { /* only consider if it has the same primary address ... */ if (memcmp(orig_node->orig, @@ -499,8 +506,8 @@ void update_bonding_candidates(struct orig_node *orig_node) * select this candidate because of possible interference. */ interference_candidate = 0; - list_for_each_entry(tmp_neigh_node2, - &orig_node->neigh_list, list) { + hlist_for_each_entry(tmp_neigh_node2, node2, + &orig_node->neigh_list, list) { if (tmp_neigh_node2 == tmp_neigh_node) continue; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index f9217d5..779c5c3 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -83,7 +83,7 @@ struct orig_node { uint8_t last_ttl; unsigned long bcast_bits[NUM_WORDS]; uint32_t last_bcast_seqno; - struct list_head neigh_list; + struct hlist_head neigh_list; struct list_head frag_list; unsigned long last_frag_packet; struct { @@ -105,7 +105,7 @@ struct gw_node { * @last_valid: when last packet via this neighbor was received */ struct neigh_node { - struct list_head list; + struct hlist_node list; uint8_t addr[ETH_ALEN]; uint8_t real_packet_count; uint8_t tq_recv[TQ_GLOBAL_WINDOW_SIZE]; -- cgit v1.1 From f987ed6ebd991009cd9f6190ce319e8b50d6be1f Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Sun, 12 Dec 2010 21:57:12 +0000 Subject: batman-adv: protect neighbor list with rcu locks Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 35 ++++++++++++++++++++++++++++------- net/batman-adv/routing.c | 41 +++++++++++++++++++++++++++-------------- net/batman-adv/types.h | 2 ++ 3 files changed, 57 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 68b04e7..6cb9af3 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -67,6 +67,14 @@ void neigh_node_free_ref(struct kref *refcount) kfree(neigh_node); } +static void neigh_node_free_rcu(struct rcu_head *rcu) +{ + struct neigh_node *neigh_node; + + neigh_node = container_of(rcu, struct neigh_node, rcu); + kref_put(&neigh_node->refcount, neigh_node_free_ref); +} + struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, uint8_t *neigh, @@ -89,7 +97,9 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, neigh_node->if_incoming = if_incoming; kref_init(&neigh_node->refcount); - hlist_add_head(&neigh_node->list, &orig_node->neigh_list); + spin_lock_bh(&orig_node->neigh_list_lock); + hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); + spin_unlock_bh(&orig_node->neigh_list_lock); return neigh_node; } @@ -100,13 +110,17 @@ static void free_orig_node(void *data, void *arg) struct orig_node *orig_node = (struct orig_node *)data; struct bat_priv *bat_priv = (struct bat_priv *)arg; + spin_lock_bh(&orig_node->neigh_list_lock); + /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node, node_tmp, &orig_node->neigh_list, list) { - hlist_del(&neigh_node->list); - kref_put(&neigh_node->refcount, neigh_node_free_ref); + hlist_del_rcu(&neigh_node->list); + call_rcu(&neigh_node->rcu, neigh_node_free_rcu); } + spin_unlock_bh(&orig_node->neigh_list_lock); + frag_list_free(&orig_node->frag_list); hna_global_del_orig(bat_priv, orig_node, "originator timed out"); @@ -151,6 +165,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) return NULL; INIT_HLIST_HEAD(&orig_node->neigh_list); + spin_lock_init(&orig_node->neigh_list_lock); memcpy(orig_node->orig, addr, ETH_ALEN); orig_node->router = NULL; @@ -200,6 +215,8 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, *best_neigh_node = NULL; + spin_lock_bh(&orig_node->neigh_list_lock); + /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node, node_tmp, &orig_node->neigh_list, list) { @@ -225,14 +242,16 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, neigh_purged = true; - hlist_del(&neigh_node->list); - kref_put(&neigh_node->refcount, neigh_node_free_ref); + hlist_del_rcu(&neigh_node->list); + call_rcu(&neigh_node->rcu, neigh_node_free_rcu); } else { if ((!*best_neigh_node) || (neigh_node->tq_avg > (*best_neigh_node)->tq_avg)) *best_neigh_node = neigh_node; } } + + spin_unlock_bh(&orig_node->neigh_list_lock); return neigh_purged; } @@ -384,11 +403,13 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node->addr, neigh_node->if_incoming->net_dev->name); - hlist_for_each_entry(neigh_node, node, - &orig_node->neigh_list, list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(neigh_node, node, + &orig_node->neigh_list, list) { seq_printf(seq, " %pM (%3i)", neigh_node->addr, neigh_node->tq_avg); } + rcu_read_unlock(); seq_printf(seq, "\n"); batman_count++; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index e8379ba..97f3216 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -153,14 +153,16 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, unsigned char total_count; if (orig_node == orig_neigh_node) { - hlist_for_each_entry(tmp_neigh_node, node, - &orig_node->neigh_list, list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { if (compare_orig(tmp_neigh_node->addr, orig_neigh_node->orig) && (tmp_neigh_node->if_incoming == if_incoming)) neigh_node = tmp_neigh_node; } + rcu_read_unlock(); if (!neigh_node) neigh_node = create_neighbor(orig_node, @@ -174,14 +176,16 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, neigh_node->last_valid = jiffies; } else { /* find packet count of corresponding one hop neighbor */ - hlist_for_each_entry(tmp_neigh_node, node, - &orig_neigh_node->neigh_list, list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_neigh_node->neigh_list, list) { if (compare_orig(tmp_neigh_node->addr, orig_neigh_node->orig) && (tmp_neigh_node->if_incoming == if_incoming)) neigh_node = tmp_neigh_node; } + rcu_read_unlock(); if (!neigh_node) neigh_node = create_neighbor(orig_neigh_node, @@ -266,8 +270,9 @@ static void update_orig(struct bat_priv *bat_priv, bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " "Searching and updating originator entry of received packet\n"); - hlist_for_each_entry(tmp_neigh_node, node, - &orig_node->neigh_list, list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && (tmp_neigh_node->if_incoming == if_incoming)) { neigh_node = tmp_neigh_node; @@ -282,6 +287,7 @@ static void update_orig(struct bat_priv *bat_priv, tmp_neigh_node->tq_avg = ring_buffer_avg(tmp_neigh_node->tq_recv); } + rcu_read_unlock(); if (!neigh_node) { struct orig_node *orig_tmp; @@ -410,8 +416,9 @@ static char count_real_packets(struct ethhdr *ethhdr, &orig_node->batman_seqno_reset)) return -1; - hlist_for_each_entry(tmp_neigh_node, node, - &orig_node->neigh_list, list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { is_duplicate |= get_bit_status(tmp_neigh_node->real_bits, orig_node->last_real_seqno, @@ -431,6 +438,7 @@ static char count_real_packets(struct ethhdr *ethhdr, tmp_neigh_node->real_packet_count = bit_packet_count(tmp_neigh_node->real_bits); } + rcu_read_unlock(); if (need_update) { bat_dbg(DBG_BATMAN, bat_priv, @@ -481,15 +489,19 @@ void update_bonding_candidates(struct orig_node *orig_node) * as "bonding partner" */ /* first, zero the list */ - hlist_for_each_entry(tmp_neigh_node, node, - &orig_node->neigh_list, list) { + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { tmp_neigh_node->next_bond_candidate = NULL; } + rcu_read_unlock(); first_candidate = NULL; last_candidate = NULL; - hlist_for_each_entry(tmp_neigh_node, node, - &orig_node->neigh_list, list) { + + rcu_read_lock(); + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { /* only consider if it has the same primary address ... */ if (memcmp(orig_node->orig, @@ -506,8 +518,8 @@ void update_bonding_candidates(struct orig_node *orig_node) * select this candidate because of possible interference. */ interference_candidate = 0; - hlist_for_each_entry(tmp_neigh_node2, node2, - &orig_node->neigh_list, list) { + hlist_for_each_entry_rcu(tmp_neigh_node2, node2, + &orig_node->neigh_list, list) { if (tmp_neigh_node2 == tmp_neigh_node) continue; @@ -541,6 +553,7 @@ void update_bonding_candidates(struct orig_node *orig_node) candidates++; } + rcu_read_unlock(); if (candidates > 0) { first_candidate->next_bond_candidate = last_candidate; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 779c5c3..d4fa727 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -85,6 +85,7 @@ struct orig_node { uint32_t last_bcast_seqno; struct hlist_head neigh_list; struct list_head frag_list; + spinlock_t neigh_list_lock; /* protects neighbor list */ unsigned long last_frag_packet; struct { uint8_t candidates; @@ -116,6 +117,7 @@ struct neigh_node { unsigned long last_valid; unsigned long real_bits[NUM_WORDS]; struct kref refcount; + struct rcu_head rcu; struct orig_node *orig_node; struct batman_if *if_incoming; }; -- cgit v1.1 From 1a241a57be46cda985c7c36e24d49f67de6bfb53 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Wed, 19 Jan 2011 19:16:10 +0000 Subject: batman-adv: free neighbors when an interface is deactivated hardif_disable_interface() calls purge_orig_ref() to immediately free all neighbors associated with the interface that is going down. purge_orig_neighbors() checked if the interface status is IF_INACTIVE which is set to IF_NOT_IN_USE shortly before calling purge_orig_ref(). Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6cb9af3..899d494 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -224,10 +224,15 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, if ((time_after(jiffies, neigh_node->last_valid + PURGE_TIMEOUT * HZ)) || (neigh_node->if_incoming->if_status == IF_INACTIVE) || + (neigh_node->if_incoming->if_status == IF_NOT_IN_USE) || (neigh_node->if_incoming->if_status == IF_TO_BE_REMOVED)) { - if (neigh_node->if_incoming->if_status == - IF_TO_BE_REMOVED) + if ((neigh_node->if_incoming->if_status == + IF_INACTIVE) || + (neigh_node->if_incoming->if_status == + IF_NOT_IN_USE) || + (neigh_node->if_incoming->if_status == + IF_TO_BE_REMOVED)) bat_dbg(DBG_BATMAN, bat_priv, "neighbor purge: originator %pM, " "neighbor: %pM, iface: %s\n", -- cgit v1.1 From a775eb847ae66211577d4fd2c46749b77c9993c9 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Wed, 19 Jan 2011 20:01:39 +0000 Subject: batman-adv: protect neigh_nodes used outside of rcu_locks with refcounting Signed-off-by: Marek Lindner --- net/batman-adv/routing.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 97f3216..c15e6c1 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -151,6 +151,7 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct hlist_node *node; unsigned char total_count; + int ret = 0; if (orig_node == orig_neigh_node) { rcu_read_lock(); @@ -162,7 +163,6 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, (tmp_neigh_node->if_incoming == if_incoming)) neigh_node = tmp_neigh_node; } - rcu_read_unlock(); if (!neigh_node) neigh_node = create_neighbor(orig_node, @@ -171,7 +171,10 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, if_incoming); /* create_neighbor failed, return 0 */ if (!neigh_node) - return 0; + goto unlock; + + kref_get(&neigh_node->refcount); + rcu_read_unlock(); neigh_node->last_valid = jiffies; } else { @@ -185,7 +188,6 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, (tmp_neigh_node->if_incoming == if_incoming)) neigh_node = tmp_neigh_node; } - rcu_read_unlock(); if (!neigh_node) neigh_node = create_neighbor(orig_neigh_node, @@ -194,7 +196,10 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, if_incoming); /* create_neighbor failed, return 0 */ if (!neigh_node) - return 0; + goto unlock; + + kref_get(&neigh_node->refcount); + rcu_read_unlock(); } orig_node->last_valid = jiffies; @@ -250,9 +255,16 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, /* if link has the minimum required transmission quality * consider it bidirectional */ if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) - return 1; + ret = 1; - return 0; + goto out; + +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + kref_put(&neigh_node->refcount, neigh_node_free_ref); + return ret; } static void update_orig(struct bat_priv *bat_priv, @@ -287,23 +299,25 @@ static void update_orig(struct bat_priv *bat_priv, tmp_neigh_node->tq_avg = ring_buffer_avg(tmp_neigh_node->tq_recv); } - rcu_read_unlock(); if (!neigh_node) { struct orig_node *orig_tmp; orig_tmp = get_orig_node(bat_priv, ethhdr->h_source); if (!orig_tmp) - return; + goto unlock; neigh_node = create_neighbor(orig_node, orig_tmp, ethhdr->h_source, if_incoming); if (!neigh_node) - return; + goto unlock; } else bat_dbg(DBG_BATMAN, bat_priv, "Updating existing last-hop neighbor of originator\n"); + kref_get(&neigh_node->refcount); + rcu_read_unlock(); + orig_node->flags = batman_packet->flags; neigh_node->last_valid = jiffies; @@ -357,6 +371,14 @@ update_gw: (atomic_read(&bat_priv->gw_mode) == GW_MODE_CLIENT) && (atomic_read(&bat_priv->gw_sel_class) > 2)) gw_check_election(bat_priv, orig_node); + + goto out; + +unlock: + rcu_read_unlock(); +out: + if (neigh_node) + kref_put(&neigh_node->refcount, neigh_node_free_ref); } /* checks whether the host restarted and is in the protection time. -- cgit v1.1 From fb778ea173fcd58b8fc3d75c674f07fab187b55f Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Wed, 19 Jan 2011 20:01:40 +0000 Subject: batman-adv: protect each hash row with rcu locks Signed-off-by: Marek Lindner --- net/batman-adv/hash.c | 34 +++++++++++++----- net/batman-adv/hash.h | 73 +++++++++++++++++++++++++------------- net/batman-adv/icmp_socket.c | 2 ++ net/batman-adv/originator.c | 27 ++++++++++---- net/batman-adv/routing.c | 16 ++++++++- net/batman-adv/translation-table.c | 14 ++++++++ net/batman-adv/unicast.c | 2 ++ net/batman-adv/vis.c | 18 +++++++--- 8 files changed, 141 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index fa26939..0265366 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -27,13 +27,16 @@ static void hash_init(struct hashtable_t *hash) { int i; - for (i = 0 ; i < hash->size; i++) + for (i = 0 ; i < hash->size; i++) { INIT_HLIST_HEAD(&hash->table[i]); + spin_lock_init(&hash->list_locks[i]); + } } /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash) { + kfree(hash->list_locks); kfree(hash->table); kfree(hash); } @@ -43,20 +46,33 @@ struct hashtable_t *hash_new(int size) { struct hashtable_t *hash; - hash = kmalloc(sizeof(struct hashtable_t) , GFP_ATOMIC); - + hash = kmalloc(sizeof(struct hashtable_t), GFP_ATOMIC); if (!hash) return NULL; - hash->size = size; hash->table = kmalloc(sizeof(struct element_t *) * size, GFP_ATOMIC); + if (!hash->table) + goto free_hash; - if (!hash->table) { - kfree(hash); - return NULL; - } + hash->list_locks = kmalloc(sizeof(spinlock_t) * size, GFP_ATOMIC); + if (!hash->list_locks) + goto free_table; + hash->size = size; hash_init(hash); - return hash; + +free_table: + kfree(hash->table); +free_hash: + kfree(hash); + return NULL; +} + +void bucket_free_rcu(struct rcu_head *rcu) +{ + struct element_t *bucket; + + bucket = container_of(rcu, struct element_t, rcu); + kfree(bucket); } diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index eae2440..3c48c6b 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -39,10 +39,12 @@ typedef void (*hashdata_free_cb)(void *, void *); struct element_t { void *data; /* pointer to the data */ struct hlist_node hlist; /* bucket list pointer */ + struct rcu_head rcu; }; struct hashtable_t { - struct hlist_head *table; /* the hashtable itself, with the buckets */ + struct hlist_head *table; /* the hashtable itself with the buckets */ + spinlock_t *list_locks; /* spinlock for each hash list entry */ int size; /* size of hashtable */ }; @@ -52,6 +54,8 @@ struct hashtable_t *hash_new(int size); /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash); +void bucket_free_rcu(struct rcu_head *rcu); + /* remove the hash structure. if hashdata_free_cb != NULL, this function will be * called to remove the elements inside of the hash. if you don't remove the * elements, memory might be leaked. */ @@ -61,19 +65,22 @@ static inline void hash_delete(struct hashtable_t *hash, struct hlist_head *head; struct hlist_node *walk, *safe; struct element_t *bucket; + spinlock_t *list_lock; /* spinlock to protect write access */ int i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; + list_lock = &hash->list_locks[i]; - hlist_for_each_safe(walk, safe, head) { - bucket = hlist_entry(walk, struct element_t, hlist); + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { if (free_cb) free_cb(bucket->data, arg); - hlist_del(walk); - kfree(bucket); + hlist_del_rcu(walk); + call_rcu(&bucket->rcu, bucket_free_rcu); } + spin_unlock_bh(list_lock); } hash_destroy(hash); @@ -88,29 +95,39 @@ static inline int hash_add(struct hashtable_t *hash, struct hlist_head *head; struct hlist_node *walk, *safe; struct element_t *bucket; + spinlock_t *list_lock; /* spinlock to protect write access */ if (!hash) - return -1; + goto err; index = choose(data, hash->size); head = &hash->table[index]; + list_lock = &hash->list_locks[index]; - hlist_for_each_safe(walk, safe, head) { - bucket = hlist_entry(walk, struct element_t, hlist); + rcu_read_lock(); + hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { if (compare(bucket->data, data)) - return -1; + goto err_unlock; } + rcu_read_unlock(); /* no duplicate found in list, add new element */ bucket = kmalloc(sizeof(struct element_t), GFP_ATOMIC); - if (!bucket) - return -1; + goto err; bucket->data = data; - hlist_add_head(&bucket->hlist, head); + + spin_lock_bh(list_lock); + hlist_add_head_rcu(&bucket->hlist, head); + spin_unlock_bh(list_lock); return 0; + +err_unlock: + rcu_read_unlock(); +err: + return -1; } /* removes data from hash, if found. returns pointer do data on success, so you @@ -125,25 +142,31 @@ static inline void *hash_remove(struct hashtable_t *hash, struct hlist_node *walk; struct element_t *bucket; struct hlist_head *head; - void *data_save; + void *data_save = NULL; index = choose(data, hash->size); head = &hash->table[index]; + spin_lock_bh(&hash->list_locks[index]); hlist_for_each_entry(bucket, walk, head, hlist) { if (compare(bucket->data, data)) { data_save = bucket->data; - hlist_del(walk); - kfree(bucket); - return data_save; + hlist_del_rcu(walk); + call_rcu(&bucket->rcu, bucket_free_rcu); + break; } } + spin_unlock_bh(&hash->list_locks[index]); - return NULL; + return data_save; } -/* finds data, based on the key in keydata. returns the found data on success, - * or NULL on error */ +/** + * finds data, based on the key in keydata. returns the found data on success, + * or NULL on error + * + * caller must lock with rcu_read_lock() / rcu_read_unlock() + **/ static inline void *hash_find(struct hashtable_t *hash, hashdata_compare_cb compare, hashdata_choose_cb choose, void *keydata) @@ -152,6 +175,7 @@ static inline void *hash_find(struct hashtable_t *hash, struct hlist_head *head; struct hlist_node *walk; struct element_t *bucket; + void *bucket_data = NULL; if (!hash) return NULL; @@ -159,13 +183,14 @@ static inline void *hash_find(struct hashtable_t *hash, index = choose(keydata , hash->size); head = &hash->table[index]; - hlist_for_each(walk, head) { - bucket = hlist_entry(walk, struct element_t, hlist); - if (compare(bucket->data, keydata)) - return bucket->data; + hlist_for_each_entry(bucket, walk, head, hlist) { + if (compare(bucket->data, keydata)) { + bucket_data = bucket->data; + break; + } } - return NULL; + return bucket_data; } #endif /* _NET_BATMAN_ADV_HASH_H_ */ diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 319a7cc..8e0cd8a 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -220,9 +220,11 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, goto dst_unreach; spin_lock_bh(&bat_priv->orig_hash_lock); + rcu_read_lock(); orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, compare_orig, choose_orig, icmp_packet->dst)); + rcu_read_unlock(); if (!orig_node) goto unlock; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 899d494..5c32314 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -150,9 +150,11 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) int size; int hash_added; + rcu_read_lock(); orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, compare_orig, choose_orig, addr)); + rcu_read_unlock(); if (orig_node) return orig_node; @@ -294,6 +296,7 @@ static void _purge_orig(struct bat_priv *bat_priv) struct hlist_node *walk, *safe; struct hlist_head *head; struct element_t *bucket; + spinlock_t *list_lock; /* spinlock to protect write access */ struct orig_node *orig_node; int i; @@ -305,22 +308,26 @@ static void _purge_orig(struct bat_priv *bat_priv) /* for all origins... */ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + spin_lock_bh(list_lock); hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { orig_node = bucket->data; if (purge_orig_node(bat_priv, orig_node)) { if (orig_node->gw_flags) gw_node_delete(bat_priv, orig_node); - hlist_del(walk); - kfree(bucket); + hlist_del_rcu(walk); + call_rcu(&bucket->rcu, bucket_free_rcu); free_orig_node(orig_node, bat_priv); + continue; } if (time_after(jiffies, orig_node->last_frag_packet + msecs_to_jiffies(FRAG_TIMEOUT))) frag_list_free(&orig_node->frag_list); } + spin_unlock_bh(list_lock); } spin_unlock_bh(&bat_priv->orig_hash_lock); @@ -387,7 +394,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { + rcu_read_lock(); + hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; if (!orig_node->router) @@ -408,17 +416,16 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node->addr, neigh_node->if_incoming->net_dev->name); - rcu_read_lock(); hlist_for_each_entry_rcu(neigh_node, node, &orig_node->neigh_list, list) { seq_printf(seq, " %pM (%3i)", neigh_node->addr, neigh_node->tq_avg); } - rcu_read_unlock(); seq_printf(seq, "\n"); batman_count++; } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->orig_hash_lock); @@ -476,18 +483,21 @@ int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { + rcu_read_lock(); + hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; if (orig_node_add_if(orig_node, max_if_num) == -1) goto err; } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->orig_hash_lock); return 0; err: + rcu_read_unlock(); spin_unlock_bh(&bat_priv->orig_hash_lock); return -ENOMEM; } @@ -562,7 +572,8 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { + rcu_read_lock(); + hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; ret = orig_node_del_if(orig_node, max_if_num, @@ -571,6 +582,7 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) if (ret == -1) goto err; } + rcu_read_unlock(); } /* renumber remaining batman interfaces _inside_ of orig_hash_lock */ @@ -595,6 +607,7 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) return 0; err: + rcu_read_unlock(); spin_unlock_bh(&bat_priv->orig_hash_lock); return -ENOMEM; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index c15e6c1..32ae04e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -52,7 +52,8 @@ void slide_own_bcast_window(struct batman_if *batman_if) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { + rcu_read_lock(); + hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; word_index = batman_if->if_num * NUM_WORDS; word = &(orig_node->bcast_own[word_index]); @@ -61,6 +62,7 @@ void slide_own_bcast_window(struct batman_if *batman_if) orig_node->bcast_own_sum[batman_if->if_num] = bit_packet_count(word); } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->orig_hash_lock); @@ -873,9 +875,11 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, /* answer echo request (ping) */ /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); + rcu_read_lock(); orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, compare_orig, choose_orig, icmp_packet->orig)); + rcu_read_unlock(); ret = NET_RX_DROP; if ((orig_node) && (orig_node->router)) { @@ -931,9 +935,11 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); + rcu_read_lock(); orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, icmp_packet->orig)); + rcu_read_unlock(); ret = NET_RX_DROP; if ((orig_node) && (orig_node->router)) { @@ -1023,9 +1029,11 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); + rcu_read_lock(); orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, icmp_packet->dst)); + rcu_read_unlock(); if ((orig_node) && (orig_node->router)) { @@ -1094,9 +1102,11 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, router_orig->orig, ETH_ALEN) == 0) { primary_orig_node = router_orig; } else { + rcu_read_lock(); primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig, choose_orig, router_orig->primary_addr); + rcu_read_unlock(); if (!primary_orig_node) return orig_node->router; @@ -1199,9 +1209,11 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); + rcu_read_lock(); orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, unicast_packet->dest)); + rcu_read_unlock(); router = find_router(bat_priv, orig_node, recv_if); @@ -1345,9 +1357,11 @@ int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) return NET_RX_DROP; spin_lock_bh(&bat_priv->orig_hash_lock); + rcu_read_lock(); orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, bcast_packet->orig)); + rcu_read_unlock(); if (!orig_node) { spin_unlock_bh(&bat_priv->orig_hash_lock); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7fb6726..b25e4b3 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -60,10 +60,12 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) int required_bytes; spin_lock_bh(&bat_priv->hna_lhash_lock); + rcu_read_lock(); hna_local_entry = ((struct hna_local_entry *)hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig, addr)); + rcu_read_unlock(); spin_unlock_bh(&bat_priv->hna_lhash_lock); if (hna_local_entry) { @@ -116,9 +118,11 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) /* remove address from global hash if present */ spin_lock_bh(&bat_priv->hna_ghash_lock); + rcu_read_lock(); hna_global_entry = ((struct hna_global_entry *) hash_find(bat_priv->hna_global_hash, compare_orig, choose_orig, addr)); + rcu_read_unlock(); if (hna_global_entry) _hna_global_del_orig(bat_priv, hna_global_entry, @@ -252,9 +256,11 @@ void hna_local_remove(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_lhash_lock); + rcu_read_lock(); hna_local_entry = (struct hna_local_entry *) hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig, addr); + rcu_read_unlock(); if (hna_local_entry) hna_local_del(bat_priv, hna_local_entry, message); @@ -334,9 +340,11 @@ void hna_global_add_orig(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_ghash_lock); hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); + rcu_read_lock(); hna_global_entry = (struct hna_global_entry *) hash_find(bat_priv->hna_global_hash, compare_orig, choose_orig, hna_ptr); + rcu_read_unlock(); if (!hna_global_entry) { spin_unlock_bh(&bat_priv->hna_ghash_lock); @@ -368,9 +376,11 @@ void hna_global_add_orig(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_lhash_lock); hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); + rcu_read_lock(); hna_local_entry = (struct hna_local_entry *) hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig, hna_ptr); + rcu_read_unlock(); if (hna_local_entry) hna_local_del(bat_priv, hna_local_entry, @@ -483,9 +493,11 @@ void hna_global_del_orig(struct bat_priv *bat_priv, while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) { hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN); + rcu_read_lock(); hna_global_entry = (struct hna_global_entry *) hash_find(bat_priv->hna_global_hash, compare_orig, choose_orig, hna_ptr); + rcu_read_unlock(); if ((hna_global_entry) && (hna_global_entry->orig_node == orig_node)) @@ -521,9 +533,11 @@ struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) struct hna_global_entry *hna_global_entry; spin_lock_bh(&bat_priv->hna_ghash_lock); + rcu_read_lock(); hna_global_entry = (struct hna_global_entry *) hash_find(bat_priv->hna_global_hash, compare_orig, choose_orig, addr); + rcu_read_unlock(); spin_unlock_bh(&bat_priv->hna_ghash_lock); if (!hna_global_entry) diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 1b5e761..4687027 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -179,9 +179,11 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, *new_skb = NULL; spin_lock_bh(&bat_priv->orig_hash_lock); + rcu_read_lock(); orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, unicast_packet->orig)); + rcu_read_unlock(); if (!orig_node) { pr_debug("couldn't find originator in orig_hash\n"); diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index a77b773..8092ead 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -380,8 +380,10 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, sizeof(struct vis_packet)); memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); + rcu_read_lock(); old_info = hash_find(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, &search_elem); + rcu_read_unlock(); kfree_skb(search_elem.skb_packet); if (old_info) { @@ -540,7 +542,8 @@ static int find_best_vis_server(struct bat_priv *bat_priv, for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { + rcu_read_lock(); + hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; if ((orig_node) && (orig_node->router) && (orig_node->flags & VIS_SERVER) && @@ -550,6 +553,7 @@ static int find_best_vis_server(struct bat_priv *bat_priv, ETH_ALEN); } } + rcu_read_unlock(); } return best_tq; @@ -605,7 +609,8 @@ static int generate_vis_packet(struct bat_priv *bat_priv) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { + rcu_read_lock(); + hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; neigh_node = orig_node->router; @@ -632,10 +637,12 @@ static int generate_vis_packet(struct bat_priv *bat_priv) packet->entries++; if (vis_packet_full(info)) { + rcu_read_unlock(); spin_unlock_bh(&bat_priv->orig_hash_lock); return 0; } } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->orig_hash_lock); @@ -721,7 +728,8 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { + rcu_read_lock(); + hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; /* if it's a vis server and reachable, send it. */ @@ -746,7 +754,7 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->orig_hash_lock); } - + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->orig_hash_lock); @@ -763,9 +771,11 @@ static void unicast_vis_packet(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->orig_hash_lock); packet = (struct vis_packet *)info->skb_packet->data; + rcu_read_lock(); orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, compare_orig, choose_orig, packet->target_orig)); + rcu_read_unlock(); if ((!orig_node) || (!orig_node->router)) goto out; -- cgit v1.1 From 16b1aba849eeb45d51a5de731cf103143439ffe1 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Wed, 19 Jan 2011 20:01:42 +0000 Subject: batman-adv: protect originator nodes with reference counters Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 61 ++++++++++++++++++++++++++++++++++++++------- net/batman-adv/originator.h | 1 + net/batman-adv/routing.c | 33 ++++++++++++++++-------- net/batman-adv/types.h | 2 ++ 4 files changed, 78 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 5c32314..fcdb0b7 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -103,12 +103,13 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, return neigh_node; } -static void free_orig_node(void *data, void *arg) +void orig_node_free_ref(struct kref *refcount) { struct hlist_node *node, *node_tmp; struct neigh_node *neigh_node; - struct orig_node *orig_node = (struct orig_node *)data; - struct bat_priv *bat_priv = (struct bat_priv *)arg; + struct orig_node *orig_node; + + orig_node = container_of(refcount, struct orig_node, refcount); spin_lock_bh(&orig_node->neigh_list_lock); @@ -122,7 +123,8 @@ static void free_orig_node(void *data, void *arg) spin_unlock_bh(&orig_node->neigh_list_lock); frag_list_free(&orig_node->frag_list); - hna_global_del_orig(bat_priv, orig_node, "originator timed out"); + hna_global_del_orig(orig_node->bat_priv, orig_node, + "originator timed out"); kfree(orig_node->bcast_own); kfree(orig_node->bcast_own_sum); @@ -131,17 +133,53 @@ static void free_orig_node(void *data, void *arg) void originator_free(struct bat_priv *bat_priv) { - if (!bat_priv->orig_hash) + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_node *walk, *safe; + struct hlist_head *head; + struct element_t *bucket; + spinlock_t *list_lock; /* spinlock to protect write access */ + struct orig_node *orig_node; + int i; + + if (!hash) return; cancel_delayed_work_sync(&bat_priv->orig_work); spin_lock_bh(&bat_priv->orig_hash_lock); - hash_delete(bat_priv->orig_hash, free_orig_node, bat_priv); bat_priv->orig_hash = NULL; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + list_lock = &hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { + orig_node = bucket->data; + + hlist_del_rcu(walk); + call_rcu(&bucket->rcu, bucket_free_rcu); + kref_put(&orig_node->refcount, orig_node_free_ref); + } + spin_unlock_bh(list_lock); + } + + hash_destroy(hash); spin_unlock_bh(&bat_priv->orig_hash_lock); } +static void bucket_free_orig_rcu(struct rcu_head *rcu) +{ + struct element_t *bucket; + struct orig_node *orig_node; + + bucket = container_of(rcu, struct element_t, rcu); + orig_node = bucket->data; + + kref_put(&orig_node->refcount, orig_node_free_ref); + kfree(bucket); +} + /* this function finds or creates an originator entry for the given * address if it does not exits */ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) @@ -156,8 +194,10 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) addr)); rcu_read_unlock(); - if (orig_node) + if (orig_node) { + kref_get(&orig_node->refcount); return orig_node; + } bat_dbg(DBG_BATMAN, bat_priv, "Creating new originator: %pM\n", addr); @@ -168,7 +208,9 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) INIT_HLIST_HEAD(&orig_node->neigh_list); spin_lock_init(&orig_node->neigh_list_lock); + kref_init(&orig_node->refcount); + orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); orig_node->router = NULL; orig_node->hna_buff = NULL; @@ -197,6 +239,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) if (hash_added < 0) goto free_bcast_own_sum; + /* extra reference for return */ + kref_get(&orig_node->refcount); return orig_node; free_bcast_own_sum: kfree(orig_node->bcast_own_sum); @@ -318,8 +362,7 @@ static void _purge_orig(struct bat_priv *bat_priv) if (orig_node->gw_flags) gw_node_delete(bat_priv, orig_node); hlist_del_rcu(walk); - call_rcu(&bucket->rcu, bucket_free_rcu); - free_orig_node(orig_node, bat_priv); + call_rcu(&bucket->rcu, bucket_free_orig_rcu); continue; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 88e5c60..edc64dc 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -25,6 +25,7 @@ int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); +void orig_node_free_ref(struct kref *refcount); struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 32ae04e..1c31a0e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -311,6 +311,8 @@ static void update_orig(struct bat_priv *bat_priv, neigh_node = create_neighbor(orig_node, orig_tmp, ethhdr->h_source, if_incoming); + + kref_put(&orig_tmp->refcount, orig_node_free_ref); if (!neigh_node) goto unlock; } else @@ -438,7 +440,7 @@ static char count_real_packets(struct ethhdr *ethhdr, /* signalize caller that the packet is to be dropped. */ if (window_protected(bat_priv, seq_diff, &orig_node->batman_seqno_reset)) - return -1; + goto err; rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, node, @@ -471,7 +473,12 @@ static char count_real_packets(struct ethhdr *ethhdr, orig_node->last_real_seqno = batman_packet->seqno; } + kref_put(&orig_node->refcount, orig_node_free_ref); return is_duplicate; + +err: + kref_put(&orig_node->refcount, orig_node_free_ref); + return -1; } /* copy primary address for bonding */ @@ -686,7 +693,6 @@ void receive_bat_packet(struct ethhdr *ethhdr, int offset; orig_neigh_node = get_orig_node(bat_priv, ethhdr->h_source); - if (!orig_neigh_node) return; @@ -707,6 +713,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " "originator packet from myself (via neighbor)\n"); + kref_put(&orig_neigh_node->refcount, orig_node_free_ref); return; } @@ -727,13 +734,13 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: packet within seqno protection time " "(sender: %pM)\n", ethhdr->h_source); - return; + goto out; } if (batman_packet->tq == 0) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: originator packet with tq equal 0\n"); - return; + goto out; } /* avoid temporary routing loops */ @@ -747,7 +754,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: ignoring all rebroadcast packets that " "may make me loop (sender: %pM)\n", ethhdr->h_source); - return; + goto out; } /* if sender is a direct neighbor the sender mac equals @@ -756,14 +763,14 @@ void receive_bat_packet(struct ethhdr *ethhdr, orig_node : get_orig_node(bat_priv, ethhdr->h_source)); if (!orig_neigh_node) - return; + goto out_neigh; /* drop packet if sender is not a direct neighbor and if we * don't route towards it */ if (!is_single_hop_neigh && (!orig_neigh_node->router)) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: OGM via unknown neighbor!\n"); - return; + goto out_neigh; } is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node, @@ -790,26 +797,32 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: " "rebroadcast neighbor packet with direct link flag\n"); - return; + goto out_neigh; } /* multihop originator */ if (!is_bidirectional) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: not received via bidirectional link\n"); - return; + goto out_neigh; } if (is_duplicate) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: duplicate packet received\n"); - return; + goto out_neigh; } bat_dbg(DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast originator packet\n"); schedule_forward_packet(orig_node, ethhdr, batman_packet, 0, hna_buff_len, if_incoming); + +out_neigh: + if (!is_single_hop_neigh) + kref_put(&orig_neigh_node->refcount, orig_node_free_ref); +out: + kref_put(&orig_node->refcount, orig_node_free_ref); } int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d4fa727..ca4d42d 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -86,6 +86,8 @@ struct orig_node { struct hlist_head neigh_list; struct list_head frag_list; spinlock_t neigh_list_lock; /* protects neighbor list */ + struct kref refcount; + struct bat_priv *bat_priv; unsigned long last_frag_packet; struct { uint8_t candidates; -- cgit v1.1 From 2ae2daf6c3f23364862a7d4f2ca79eab041b701b Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Wed, 19 Jan 2011 20:01:42 +0000 Subject: batman-adv: protect ogm counter arrays with spinlock Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 11 +++++++++-- net/batman-adv/routing.c | 27 +++++++++++++++++++++++---- net/batman-adv/types.h | 1 + 3 files changed, 33 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index fcdb0b7..71dfc24 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -207,6 +207,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) return NULL; INIT_HLIST_HEAD(&orig_node->neigh_list); + spin_lock_init(&orig_node->ogm_cnt_lock); spin_lock_init(&orig_node->neigh_list_lock); kref_init(&orig_node->refcount); @@ -517,7 +518,7 @@ int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) struct hlist_head *head; struct element_t *bucket; struct orig_node *orig_node; - int i; + int i, ret; /* resize all orig nodes because orig_node->bcast_own(_sum) depend on * if_num */ @@ -530,7 +531,11 @@ int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; - if (orig_node_add_if(orig_node, max_if_num) == -1) + spin_lock_bh(&orig_node->ogm_cnt_lock); + ret = orig_node_add_if(orig_node, max_if_num); + spin_unlock_bh(&orig_node->ogm_cnt_lock); + + if (ret == -1) goto err; } rcu_read_unlock(); @@ -619,8 +624,10 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; + spin_lock_bh(&orig_node->ogm_cnt_lock); ret = orig_node_del_if(orig_node, max_if_num, batman_if->if_num); + spin_unlock_bh(&orig_node->ogm_cnt_lock); if (ret == -1) goto err; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 1c31a0e..7627ebe 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -55,12 +55,14 @@ void slide_own_bcast_window(struct batman_if *batman_if) rcu_read_lock(); hlist_for_each_entry_rcu(bucket, walk, head, hlist) { orig_node = bucket->data; + spin_lock_bh(&orig_node->ogm_cnt_lock); word_index = batman_if->if_num * NUM_WORDS; word = &(orig_node->bcast_own[word_index]); bit_get_packet(bat_priv, word, 1, 0); orig_node->bcast_own_sum[batman_if->if_num] = bit_packet_count(word); + spin_unlock_bh(&orig_node->ogm_cnt_lock); } rcu_read_unlock(); } @@ -278,8 +280,10 @@ static void update_orig(struct bat_priv *bat_priv, char is_duplicate) { struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct orig_node *orig_node_tmp; struct hlist_node *node; int tmp_hna_buff_len; + uint8_t bcast_own_sum_orig, bcast_own_sum_neigh; bat_dbg(DBG_BATMAN, bat_priv, "update_originator(): " "Searching and updating originator entry of received packet\n"); @@ -351,10 +355,22 @@ static void update_orig(struct bat_priv *bat_priv, /* if the TQ is the same and the link not more symetric we * won't consider it either */ if ((orig_node->router) && - ((neigh_node->tq_avg == orig_node->router->tq_avg) && - (orig_node->router->orig_node->bcast_own_sum[if_incoming->if_num] - >= neigh_node->orig_node->bcast_own_sum[if_incoming->if_num]))) - goto update_hna; + (neigh_node->tq_avg == orig_node->router->tq_avg)) { + orig_node_tmp = orig_node->router->orig_node; + spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + bcast_own_sum_orig = + orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + + orig_node_tmp = neigh_node->orig_node; + spin_lock_bh(&orig_node_tmp->ogm_cnt_lock); + bcast_own_sum_neigh = + orig_node_tmp->bcast_own_sum[if_incoming->if_num]; + spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock); + + if (bcast_own_sum_orig >= bcast_own_sum_neigh) + goto update_hna; + } update_routes(bat_priv, orig_node, neigh_node, hna_buff, tmp_hna_buff_len); @@ -705,10 +721,13 @@ void receive_bat_packet(struct ethhdr *ethhdr, batman_packet->orig) && (batman_packet->seqno - if_incoming_seqno + 2 == 0)) { offset = if_incoming->if_num * NUM_WORDS; + + spin_lock_bh(&orig_neigh_node->ogm_cnt_lock); word = &(orig_neigh_node->bcast_own[offset]); bit_mark(word, 0); orig_neigh_node->bcast_own_sum[if_incoming->if_num] = bit_packet_count(word); + spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); } bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ca4d42d..ff70afc 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -89,6 +89,7 @@ struct orig_node { struct kref refcount; struct bat_priv *bat_priv; unsigned long last_frag_packet; + spinlock_t ogm_cnt_lock; /* protects ogm counter */ struct { uint8_t candidates; struct neigh_node *selected; -- cgit v1.1 From a4c135c561106c397bae33455acfca4aa8065a30 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 19 Jan 2011 20:01:43 +0000 Subject: batman-adv: protect bonding with rcu locks bonding / alternating candidates need to be secured by rcu locks as well. This patch therefore converts the bonding list from a plain pointer list to a rcu securable lists and references the bonding candidates. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner --- net/batman-adv/hard-interface.c | 2 +- net/batman-adv/originator.c | 25 +++- net/batman-adv/originator.h | 1 + net/batman-adv/routing.c | 313 +++++++++++++++++++++------------------- net/batman-adv/routing.h | 6 +- net/batman-adv/types.h | 9 +- net/batman-adv/unicast.c | 2 +- 7 files changed, 195 insertions(+), 163 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f2131f4..e2b001a 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -271,7 +271,7 @@ static void hardif_activate_interface(struct batman_if *batman_if) static void hardif_deactivate_interface(struct batman_if *batman_if) { if ((batman_if->if_status != IF_ACTIVE) && - (batman_if->if_status != IF_TO_BE_ACTIVATED)) + (batman_if->if_status != IF_TO_BE_ACTIVATED)) return; batman_if->if_status = IF_INACTIVE; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 71dfc24..a85eadc 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -75,6 +75,14 @@ static void neigh_node_free_rcu(struct rcu_head *rcu) kref_put(&neigh_node->refcount, neigh_node_free_ref); } +void neigh_node_free_rcu_bond(struct rcu_head *rcu) +{ + struct neigh_node *neigh_node; + + neigh_node = container_of(rcu, struct neigh_node, rcu_bond); + kref_put(&neigh_node->refcount, neigh_node_free_ref); +} + struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, uint8_t *neigh, @@ -91,6 +99,7 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, return NULL; INIT_HLIST_NODE(&neigh_node->list); + INIT_LIST_HEAD(&neigh_node->bonding_list); memcpy(neigh_node->addr, neigh, ETH_ALEN); neigh_node->orig_node = orig_neigh_node; @@ -106,13 +115,20 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, void orig_node_free_ref(struct kref *refcount) { struct hlist_node *node, *node_tmp; - struct neigh_node *neigh_node; + struct neigh_node *neigh_node, *tmp_neigh_node; struct orig_node *orig_node; orig_node = container_of(refcount, struct orig_node, refcount); spin_lock_bh(&orig_node->neigh_list_lock); + /* for all bonding members ... */ + list_for_each_entry_safe(neigh_node, tmp_neigh_node, + &orig_node->bond_list, bonding_list) { + list_del_rcu(&neigh_node->bonding_list); + call_rcu(&neigh_node->rcu_bond, neigh_node_free_rcu_bond); + } + /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node, node_tmp, &orig_node->neigh_list, list) { @@ -207,6 +223,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) return NULL; INIT_HLIST_HEAD(&orig_node->neigh_list); + INIT_LIST_HEAD(&orig_node->bond_list); spin_lock_init(&orig_node->ogm_cnt_lock); spin_lock_init(&orig_node->neigh_list_lock); kref_init(&orig_node->refcount); @@ -220,6 +237,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) orig_node->batman_seqno_reset = jiffies - 1 - msecs_to_jiffies(RESET_PROTECTION_MS); + atomic_set(&orig_node->bond_candidates, 0); + size = bat_priv->num_ifaces * sizeof(unsigned long) * NUM_WORDS; orig_node->bcast_own = kzalloc(size, GFP_ATOMIC); @@ -295,6 +314,7 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, neigh_purged = true; hlist_del_rcu(&neigh_node->list); + bonding_candidate_del(orig_node, neigh_node); call_rcu(&neigh_node->rcu, neigh_node_free_rcu); } else { if ((!*best_neigh_node) || @@ -326,9 +346,6 @@ static bool purge_orig_node(struct bat_priv *bat_priv, best_neigh_node, orig_node->hna_buff, orig_node->hna_buff_len); - /* update bonding candidates, we could have lost - * some candidates. */ - update_bonding_candidates(orig_node); } } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index edc64dc..360dfd1 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -26,6 +26,7 @@ int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); void orig_node_free_ref(struct kref *refcount); +void neigh_node_free_rcu_bond(struct rcu_head *rcu); struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7627ebe..1ad14da 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -271,6 +271,101 @@ out: return ret; } +/* caller must hold the neigh_list_lock */ +void bonding_candidate_del(struct orig_node *orig_node, + struct neigh_node *neigh_node) +{ + /* this neighbor is not part of our candidate list */ + if (list_empty(&neigh_node->bonding_list)) + goto out; + + list_del_rcu(&neigh_node->bonding_list); + call_rcu(&neigh_node->rcu_bond, neigh_node_free_rcu_bond); + INIT_LIST_HEAD(&neigh_node->bonding_list); + atomic_dec(&orig_node->bond_candidates); + +out: + return; +} + +static void bonding_candidate_add(struct orig_node *orig_node, + struct neigh_node *neigh_node) +{ + struct hlist_node *node; + struct neigh_node *tmp_neigh_node; + uint8_t best_tq, interference_candidate = 0; + + spin_lock_bh(&orig_node->neigh_list_lock); + + /* only consider if it has the same primary address ... */ + if (!compare_orig(orig_node->orig, + neigh_node->orig_node->primary_addr)) + goto candidate_del; + + if (!orig_node->router) + goto candidate_del; + + best_tq = orig_node->router->tq_avg; + + /* ... and is good enough to be considered */ + if (neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD) + goto candidate_del; + + /** + * check if we have another candidate with the same mac address or + * interface. If we do, we won't select this candidate because of + * possible interference. + */ + hlist_for_each_entry_rcu(tmp_neigh_node, node, + &orig_node->neigh_list, list) { + + if (tmp_neigh_node == neigh_node) + continue; + + /* we only care if the other candidate is even + * considered as candidate. */ + if (list_empty(&tmp_neigh_node->bonding_list)) + continue; + + if ((neigh_node->if_incoming == tmp_neigh_node->if_incoming) || + (compare_orig(neigh_node->addr, tmp_neigh_node->addr))) { + interference_candidate = 1; + break; + } + } + + /* don't care further if it is an interference candidate */ + if (interference_candidate) + goto candidate_del; + + /* this neighbor already is part of our candidate list */ + if (!list_empty(&neigh_node->bonding_list)) + goto out; + + list_add_rcu(&neigh_node->bonding_list, &orig_node->bond_list); + kref_get(&neigh_node->refcount); + atomic_inc(&orig_node->bond_candidates); + goto out; + +candidate_del: + bonding_candidate_del(orig_node, neigh_node); + +out: + spin_unlock_bh(&orig_node->neigh_list_lock); + return; +} + +/* copy primary address for bonding */ +static void bonding_save_primary(struct orig_node *orig_node, + struct orig_node *orig_neigh_node, + struct batman_packet *batman_packet) +{ + if (!(batman_packet->flags & PRIMARIES_FIRST_HOP)) + return; + + memcpy(orig_neigh_node->primary_addr, orig_node->orig, ETH_ALEN); +} + static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, struct ethhdr *ethhdr, @@ -339,6 +434,8 @@ static void update_orig(struct bat_priv *bat_priv, neigh_node->last_ttl = batman_packet->ttl; } + bonding_candidate_add(orig_node, neigh_node); + tmp_hna_buff_len = (hna_buff_len > batman_packet->num_hna * ETH_ALEN ? batman_packet->num_hna * ETH_ALEN : hna_buff_len); @@ -497,123 +594,10 @@ err: return -1; } -/* copy primary address for bonding */ -static void mark_bonding_address(struct orig_node *orig_node, - struct orig_node *orig_neigh_node, - struct batman_packet *batman_packet) - -{ - if (batman_packet->flags & PRIMARIES_FIRST_HOP) - memcpy(orig_neigh_node->primary_addr, - orig_node->orig, ETH_ALEN); - - return; -} - -/* mark possible bond.candidates in the neighbor list */ -void update_bonding_candidates(struct orig_node *orig_node) -{ - int candidates; - int interference_candidate; - int best_tq; - struct hlist_node *node, *node2; - struct neigh_node *tmp_neigh_node, *tmp_neigh_node2; - struct neigh_node *first_candidate, *last_candidate; - - /* update the candidates for this originator */ - if (!orig_node->router) { - orig_node->bond.candidates = 0; - return; - } - - best_tq = orig_node->router->tq_avg; - - /* update bond.candidates */ - - candidates = 0; - - /* mark other nodes which also received "PRIMARIES FIRST HOP" packets - * as "bonding partner" */ - - /* first, zero the list */ - rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, - &orig_node->neigh_list, list) { - tmp_neigh_node->next_bond_candidate = NULL; - } - rcu_read_unlock(); - - first_candidate = NULL; - last_candidate = NULL; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tmp_neigh_node, node, - &orig_node->neigh_list, list) { - - /* only consider if it has the same primary address ... */ - if (memcmp(orig_node->orig, - tmp_neigh_node->orig_node->primary_addr, - ETH_ALEN) != 0) - continue; - - /* ... and is good enough to be considered */ - if (tmp_neigh_node->tq_avg < best_tq - BONDING_TQ_THRESHOLD) - continue; - - /* check if we have another candidate with the same - * mac address or interface. If we do, we won't - * select this candidate because of possible interference. */ - - interference_candidate = 0; - hlist_for_each_entry_rcu(tmp_neigh_node2, node2, - &orig_node->neigh_list, list) { - - if (tmp_neigh_node2 == tmp_neigh_node) - continue; - - /* we only care if the other candidate is even - * considered as candidate. */ - if (!tmp_neigh_node2->next_bond_candidate) - continue; - - - if ((tmp_neigh_node->if_incoming == - tmp_neigh_node2->if_incoming) - || (memcmp(tmp_neigh_node->addr, - tmp_neigh_node2->addr, ETH_ALEN) == 0)) { - - interference_candidate = 1; - break; - } - } - /* don't care further if it is an interference candidate */ - if (interference_candidate) - continue; - - if (!first_candidate) { - first_candidate = tmp_neigh_node; - tmp_neigh_node->next_bond_candidate = first_candidate; - } else - tmp_neigh_node->next_bond_candidate = last_candidate; - - last_candidate = tmp_neigh_node; - - candidates++; - } - rcu_read_unlock(); - - if (candidates > 0) { - first_candidate->next_bond_candidate = last_candidate; - orig_node->bond.selected = first_candidate; - } - - orig_node->bond.candidates = candidates; -} - void receive_bat_packet(struct ethhdr *ethhdr, - struct batman_packet *batman_packet, - unsigned char *hna_buff, int hna_buff_len, - struct batman_if *if_incoming) + struct batman_packet *batman_packet, + unsigned char *hna_buff, int hna_buff_len, + struct batman_if *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batman_if *batman_if; @@ -795,6 +779,8 @@ void receive_bat_packet(struct ethhdr *ethhdr, is_bidirectional = is_bidirectional_neigh(orig_node, orig_neigh_node, batman_packet, if_incoming); + bonding_save_primary(orig_node, orig_neigh_node, batman_packet); + /* update ranking if it is not a duplicate or has the same * seqno and similar ttl as the non-duplicate */ if (is_bidirectional && @@ -804,9 +790,6 @@ void receive_bat_packet(struct ethhdr *ethhdr, update_orig(bat_priv, orig_node, ethhdr, batman_packet, if_incoming, hna_buff, hna_buff_len, is_duplicate); - mark_bonding_address(orig_node, orig_neigh_node, batman_packet); - update_bonding_candidates(orig_node); - /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { @@ -1095,14 +1078,15 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) } /* find a suitable router for this originator, and use - * bonding if possible. */ + * bonding if possible. increases the found neighbors + * refcount.*/ struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, struct batman_if *recv_if) { struct orig_node *primary_orig_node; struct orig_node *router_orig; - struct neigh_node *router, *first_candidate, *best_router; + struct neigh_node *router, *first_candidate, *tmp_neigh_node; static uint8_t zero_mac[ETH_ALEN] = {0, 0, 0, 0, 0, 0}; int bonding_enabled; @@ -1114,18 +1098,25 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, /* without bonding, the first node should * always choose the default router. */ - bonding_enabled = atomic_read(&bat_priv->bonding); - if ((!recv_if) && (!bonding_enabled)) - return orig_node->router; - + rcu_read_lock(); + /* select default router to output */ + router = orig_node->router; router_orig = orig_node->router->orig_node; + if (!router_orig) { + rcu_read_unlock(); + return NULL; + } + + + if ((!recv_if) && (!bonding_enabled)) + goto return_router; /* if we have something in the primary_addr, we can search * for a potential bonding candidate. */ if (memcmp(router_orig->primary_addr, zero_mac, ETH_ALEN) == 0) - return orig_node->router; + goto return_router; /* find the orig_node which has the primary interface. might * even be the same as our router_orig in many cases */ @@ -1134,60 +1125,81 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, router_orig->orig, ETH_ALEN) == 0) { primary_orig_node = router_orig; } else { - rcu_read_lock(); primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig, choose_orig, router_orig->primary_addr); - rcu_read_unlock(); - if (!primary_orig_node) - return orig_node->router; + goto return_router; } /* with less than 2 candidates, we can't do any * bonding and prefer the original router. */ - - if (primary_orig_node->bond.candidates < 2) - return orig_node->router; + if (atomic_read(&primary_orig_node->bond_candidates) < 2) + goto return_router; /* all nodes between should choose a candidate which * is is not on the interface where the packet came * in. */ - first_candidate = primary_orig_node->bond.selected; - router = first_candidate; + + first_candidate = NULL; + router = NULL; if (bonding_enabled) { /* in the bonding case, send the packets in a round * robin fashion over the remaining interfaces. */ - do { + + list_for_each_entry_rcu(tmp_neigh_node, + &primary_orig_node->bond_list, bonding_list) { + if (!first_candidate) + first_candidate = tmp_neigh_node; /* recv_if == NULL on the first node. */ - if (router->if_incoming != recv_if) + if (tmp_neigh_node->if_incoming != recv_if) { + router = tmp_neigh_node; break; + } + } - router = router->next_bond_candidate; - } while (router != first_candidate); + /* use the first candidate if nothing was found. */ + if (!router) + router = first_candidate; - primary_orig_node->bond.selected = router->next_bond_candidate; + /* selected should point to the next element + * after the current router */ + spin_lock_bh(&primary_orig_node->neigh_list_lock); + /* this is a list_move(), which unfortunately + * does not exist as rcu version */ + list_del_rcu(&primary_orig_node->bond_list); + list_add_rcu(&primary_orig_node->bond_list, + &router->bonding_list); + spin_unlock_bh(&primary_orig_node->neigh_list_lock); } else { /* if bonding is disabled, use the best of the * remaining candidates which are not using * this interface. */ - best_router = first_candidate; + list_for_each_entry_rcu(tmp_neigh_node, + &primary_orig_node->bond_list, bonding_list) { + if (!first_candidate) + first_candidate = tmp_neigh_node; - do { /* recv_if == NULL on the first node. */ - if ((router->if_incoming != recv_if) && - (router->tq_avg > best_router->tq_avg)) - best_router = router; - - router = router->next_bond_candidate; - } while (router != first_candidate); + if (tmp_neigh_node->if_incoming != recv_if) + /* if we don't have a router yet + * or this one is better, choose it. */ + if ((!router) || + (tmp_neigh_node->tq_avg > router->tq_avg)) { + router = tmp_neigh_node; + } + } - router = best_router; + /* use the first candidate if nothing was found. */ + if (!router) + router = first_candidate; } - +return_router: + kref_get(&router->refcount); + rcu_read_unlock(); return router; } @@ -1247,6 +1259,7 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, unicast_packet->dest)); rcu_read_unlock(); + /* find_router() increases neigh_nodes refcount if found. */ router = find_router(bat_priv, orig_node, recv_if); if (!router) { diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index a09d16f..e2a9872 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -39,7 +39,9 @@ int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if); int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if); int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, - struct orig_node *orig_node, struct batman_if *recv_if); -void update_bonding_candidates(struct orig_node *orig_node); + struct orig_node *orig_node, + struct batman_if *recv_if); +void bonding_candidate_del(struct orig_node *orig_node, + struct neigh_node *neigh_node); #endif /* _NET_BATMAN_ADV_ROUTING_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ff70afc..1f833f0 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -90,10 +90,8 @@ struct orig_node { struct bat_priv *bat_priv; unsigned long last_frag_packet; spinlock_t ogm_cnt_lock; /* protects ogm counter */ - struct { - uint8_t candidates; - struct neigh_node *selected; - } bond; + atomic_t bond_candidates; + struct list_head bond_list; }; struct gw_node { @@ -116,11 +114,12 @@ struct neigh_node { uint8_t tq_index; uint8_t tq_avg; uint8_t last_ttl; - struct neigh_node *next_bond_candidate; + struct list_head bonding_list; unsigned long last_valid; unsigned long real_bits[NUM_WORDS]; struct kref refcount; struct rcu_head rcu; + struct rcu_head rcu_bond; struct orig_node *orig_node; struct batman_if *if_incoming; }; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 4687027..00bfeaf 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -299,6 +299,7 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) if (!orig_node) orig_node = transtable_search(bat_priv, ethhdr->h_dest); + /* find_router() increases neigh_nodes refcount if found. */ router = find_router(bat_priv, orig_node, NULL); if (!router) @@ -306,7 +307,6 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) /* don't lock while sending the packets ... we therefore * copy the required data before sending */ - batman_if = router->if_incoming; memcpy(dstaddr, router->addr, ETH_ALEN); -- cgit v1.1 From 44524fcdf6ca19b58c24f7622c4af1d8d8fe59f8 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 10 Feb 2011 14:33:53 +0000 Subject: batman-adv: Correct rcu refcounting for neigh_node It might be possible that 2 threads access the same data in the same rcu grace period. The first thread calls call_rcu() to decrement the refcount and free the data while the second thread increases the refcount to use the data. To avoid this race condition all refcount operations have to be atomic. Reported-by: Sven Eckelmann Signed-off-by: Marek Lindner --- net/batman-adv/icmp_socket.c | 27 +++- net/batman-adv/originator.c | 26 +--- net/batman-adv/originator.h | 3 +- net/batman-adv/routing.c | 338 ++++++++++++++++++++++++++++--------------- net/batman-adv/types.h | 3 +- net/batman-adv/unicast.c | 57 +++++--- net/batman-adv/vis.c | 33 ++++- 7 files changed, 313 insertions(+), 174 deletions(-) (limited to 'net') diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 8e0cd8a..7fa5bb8 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -156,7 +156,8 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, struct sk_buff *skb; struct icmp_packet_rr *icmp_packet; - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; struct batman_if *batman_if; size_t packet_len = sizeof(struct icmp_packet); uint8_t dstaddr[ETH_ALEN]; @@ -224,17 +225,25 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, compare_orig, choose_orig, icmp_packet->dst)); - rcu_read_unlock(); if (!orig_node) goto unlock; - if (!orig_node->router) + kref_get(&orig_node->refcount); + neigh_node = orig_node->router; + + if (!neigh_node) + goto unlock; + + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; goto unlock; + } + + rcu_read_unlock(); batman_if = orig_node->router->if_incoming; memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); if (!batman_if) @@ -247,14 +256,14 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); if (packet_len == sizeof(struct icmp_packet_rr)) - memcpy(icmp_packet->rr, batman_if->net_dev->dev_addr, ETH_ALEN); - + memcpy(icmp_packet->rr, + batman_if->net_dev->dev_addr, ETH_ALEN); send_skb_packet(skb, batman_if, dstaddr); - goto out; unlock: + rcu_read_unlock(); spin_unlock_bh(&bat_priv->orig_hash_lock); dst_unreach: icmp_packet->msg_type = DESTINATION_UNREACHABLE; @@ -262,6 +271,10 @@ dst_unreach: free_skb: kfree_skb(skb); out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); return len; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a85eadc..61299da 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -59,28 +59,18 @@ err: return 0; } -void neigh_node_free_ref(struct kref *refcount) -{ - struct neigh_node *neigh_node; - - neigh_node = container_of(refcount, struct neigh_node, refcount); - kfree(neigh_node); -} - static void neigh_node_free_rcu(struct rcu_head *rcu) { struct neigh_node *neigh_node; neigh_node = container_of(rcu, struct neigh_node, rcu); - kref_put(&neigh_node->refcount, neigh_node_free_ref); + kfree(neigh_node); } -void neigh_node_free_rcu_bond(struct rcu_head *rcu) +void neigh_node_free_ref(struct neigh_node *neigh_node) { - struct neigh_node *neigh_node; - - neigh_node = container_of(rcu, struct neigh_node, rcu_bond); - kref_put(&neigh_node->refcount, neigh_node_free_ref); + if (atomic_dec_and_test(&neigh_node->refcount)) + call_rcu(&neigh_node->rcu, neigh_node_free_rcu); } struct neigh_node *create_neighbor(struct orig_node *orig_node, @@ -104,7 +94,7 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, memcpy(neigh_node->addr, neigh, ETH_ALEN); neigh_node->orig_node = orig_neigh_node; neigh_node->if_incoming = if_incoming; - kref_init(&neigh_node->refcount); + atomic_set(&neigh_node->refcount, 1); spin_lock_bh(&orig_node->neigh_list_lock); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); @@ -126,14 +116,14 @@ void orig_node_free_ref(struct kref *refcount) list_for_each_entry_safe(neigh_node, tmp_neigh_node, &orig_node->bond_list, bonding_list) { list_del_rcu(&neigh_node->bonding_list); - call_rcu(&neigh_node->rcu_bond, neigh_node_free_rcu_bond); + neigh_node_free_ref(neigh_node); } /* for all neighbors towards this originator ... */ hlist_for_each_entry_safe(neigh_node, node, node_tmp, &orig_node->neigh_list, list) { hlist_del_rcu(&neigh_node->list); - call_rcu(&neigh_node->rcu, neigh_node_free_rcu); + neigh_node_free_ref(neigh_node); } spin_unlock_bh(&orig_node->neigh_list_lock); @@ -315,7 +305,7 @@ static bool purge_orig_neighbors(struct bat_priv *bat_priv, hlist_del_rcu(&neigh_node->list); bonding_candidate_del(orig_node, neigh_node); - call_rcu(&neigh_node->rcu, neigh_node_free_rcu); + neigh_node_free_ref(neigh_node); } else { if ((!*best_neigh_node) || (neigh_node->tq_avg > (*best_neigh_node)->tq_avg)) diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 360dfd1..84d96e2 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -26,13 +26,12 @@ int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); void orig_node_free_ref(struct kref *refcount); -void neigh_node_free_rcu_bond(struct rcu_head *rcu); struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, uint8_t *neigh, struct batman_if *if_incoming); -void neigh_node_free_ref(struct kref *refcount); +void neigh_node_free_ref(struct neigh_node *neigh_node); int orig_seq_print_text(struct seq_file *seq, void *offset); int orig_hash_add_if(struct batman_if *batman_if, int max_if_num); int orig_hash_del_if(struct batman_if *batman_if, int max_if_num); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 1ad14da..9185666 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -121,12 +121,12 @@ static void update_route(struct bat_priv *bat_priv, orig_node->router->addr); } - if (neigh_node) - kref_get(&neigh_node->refcount); + if (neigh_node && !atomic_inc_not_zero(&neigh_node->refcount)) + neigh_node = NULL; neigh_node_tmp = orig_node->router; orig_node->router = neigh_node; if (neigh_node_tmp) - kref_put(&neigh_node_tmp->refcount, neigh_node_free_ref); + neigh_node_free_ref(neigh_node_tmp); } @@ -177,7 +177,11 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, if (!neigh_node) goto unlock; - kref_get(&neigh_node->refcount); + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } + rcu_read_unlock(); neigh_node->last_valid = jiffies; @@ -202,7 +206,11 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, if (!neigh_node) goto unlock; - kref_get(&neigh_node->refcount); + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } + rcu_read_unlock(); } @@ -267,7 +275,7 @@ unlock: rcu_read_unlock(); out: if (neigh_node) - kref_put(&neigh_node->refcount, neigh_node_free_ref); + neigh_node_free_ref(neigh_node); return ret; } @@ -280,8 +288,8 @@ void bonding_candidate_del(struct orig_node *orig_node, goto out; list_del_rcu(&neigh_node->bonding_list); - call_rcu(&neigh_node->rcu_bond, neigh_node_free_rcu_bond); INIT_LIST_HEAD(&neigh_node->bonding_list); + neigh_node_free_ref(neigh_node); atomic_dec(&orig_node->bond_candidates); out: @@ -342,8 +350,10 @@ static void bonding_candidate_add(struct orig_node *orig_node, if (!list_empty(&neigh_node->bonding_list)) goto out; + if (!atomic_inc_not_zero(&neigh_node->refcount)) + goto out; + list_add_rcu(&neigh_node->bonding_list, &orig_node->bond_list); - kref_get(&neigh_node->refcount); atomic_inc(&orig_node->bond_candidates); goto out; @@ -387,7 +397,10 @@ static void update_orig(struct bat_priv *bat_priv, hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_node->neigh_list, list) { if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && - (tmp_neigh_node->if_incoming == if_incoming)) { + (tmp_neigh_node->if_incoming == if_incoming) && + atomic_inc_not_zero(&tmp_neigh_node->refcount)) { + if (neigh_node) + neigh_node_free_ref(neigh_node); neigh_node = tmp_neigh_node; continue; } @@ -414,11 +427,15 @@ static void update_orig(struct bat_priv *bat_priv, kref_put(&orig_tmp->refcount, orig_node_free_ref); if (!neigh_node) goto unlock; + + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } } else bat_dbg(DBG_BATMAN, bat_priv, "Updating existing last-hop neighbor of originator\n"); - kref_get(&neigh_node->refcount); rcu_read_unlock(); orig_node->flags = batman_packet->flags; @@ -495,7 +512,7 @@ unlock: rcu_read_unlock(); out: if (neigh_node) - kref_put(&neigh_node->refcount, neigh_node_free_ref); + neigh_node_free_ref(neigh_node); } /* checks whether the host restarted and is in the protection time. @@ -870,22 +887,23 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) static int recv_my_icmp_packet(struct bat_priv *bat_priv, struct sk_buff *skb, size_t icmp_len) { - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; struct icmp_packet_rr *icmp_packet; struct batman_if *batman_if; - int ret; uint8_t dstaddr[ETH_ALEN]; + int ret = NET_RX_DROP; icmp_packet = (struct icmp_packet_rr *)skb->data; /* add data to device queue */ if (icmp_packet->msg_type != ECHO_REQUEST) { bat_socket_receive_packet(icmp_packet, icmp_len); - return NET_RX_DROP; + goto out; } if (!bat_priv->primary_if) - return NET_RX_DROP; + goto out; /* answer echo request (ping) */ /* get routing information */ @@ -894,46 +912,65 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, compare_orig, choose_orig, icmp_packet->orig)); - rcu_read_unlock(); - ret = NET_RX_DROP; - if ((orig_node) && (orig_node->router)) { + if (!orig_node) + goto unlock; - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); + kref_get(&orig_node->refcount); + neigh_node = orig_node->router; - /* create a copy of the skb, if needed, to modify it. */ - if (skb_cow(skb, sizeof(struct ethhdr)) < 0) - return NET_RX_DROP; + if (!neigh_node) + goto unlock; - icmp_packet = (struct icmp_packet_rr *)skb->data; + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } - memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); - memcpy(icmp_packet->orig, - bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->msg_type = ECHO_REPLY; - icmp_packet->ttl = TTL; + rcu_read_unlock(); - send_skb_packet(skb, batman_if, dstaddr); - ret = NET_RX_SUCCESS; + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); - } else - spin_unlock_bh(&bat_priv->orig_hash_lock); + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + goto out; + icmp_packet = (struct icmp_packet_rr *)skb->data; + + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + icmp_packet->msg_type = ECHO_REPLY; + icmp_packet->ttl = TTL; + + send_skb_packet(skb, batman_if, dstaddr); + ret = NET_RX_SUCCESS; + goto out; + +unlock: + rcu_read_unlock(); + spin_unlock_bh(&bat_priv->orig_hash_lock); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); return ret; } static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, struct sk_buff *skb) { - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; struct icmp_packet *icmp_packet; struct batman_if *batman_if; - int ret; uint8_t dstaddr[ETH_ALEN]; + int ret = NET_RX_DROP; icmp_packet = (struct icmp_packet *)skb->data; @@ -942,11 +979,11 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, pr_debug("Warning - can't forward icmp packet from %pM to " "%pM: ttl exceeded\n", icmp_packet->orig, icmp_packet->dst); - return NET_RX_DROP; + goto out; } if (!bat_priv->primary_if) - return NET_RX_DROP; + goto out; /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); @@ -954,35 +991,53 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, icmp_packet->orig)); - rcu_read_unlock(); - ret = NET_RX_DROP; - if ((orig_node) && (orig_node->router)) { + if (!orig_node) + goto unlock; - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); + kref_get(&orig_node->refcount); + neigh_node = orig_node->router; - /* create a copy of the skb, if needed, to modify it. */ - if (skb_cow(skb, sizeof(struct ethhdr)) < 0) - return NET_RX_DROP; + if (!neigh_node) + goto unlock; + + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } - icmp_packet = (struct icmp_packet *) skb->data; + rcu_read_unlock(); - memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); - memcpy(icmp_packet->orig, - bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); - icmp_packet->msg_type = TTL_EXCEEDED; - icmp_packet->ttl = TTL; + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); - send_skb_packet(skb, batman_if, dstaddr); - ret = NET_RX_SUCCESS; + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + goto out; - } else - spin_unlock_bh(&bat_priv->orig_hash_lock); + icmp_packet = (struct icmp_packet *)skb->data; + + memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); + memcpy(icmp_packet->orig, + bat_priv->primary_if->net_dev->dev_addr, ETH_ALEN); + icmp_packet->msg_type = TTL_EXCEEDED; + icmp_packet->ttl = TTL; + + send_skb_packet(skb, batman_if, dstaddr); + ret = NET_RX_SUCCESS; + goto out; +unlock: + rcu_read_unlock(); + spin_unlock_bh(&bat_priv->orig_hash_lock); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); return ret; } @@ -992,11 +1047,12 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct icmp_packet_rr *icmp_packet; struct ethhdr *ethhdr; - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; struct batman_if *batman_if; int hdr_size = sizeof(struct icmp_packet); - int ret; uint8_t dstaddr[ETH_ALEN]; + int ret = NET_RX_DROP; /** * we truncate all incoming icmp packets if they don't match our size @@ -1006,21 +1062,21 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - return NET_RX_DROP; + goto out; ethhdr = (struct ethhdr *)skb_mac_header(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) - return NET_RX_DROP; + goto out; /* packet with broadcast sender address */ if (is_broadcast_ether_addr(ethhdr->h_source)) - return NET_RX_DROP; + goto out; /* not for me */ if (!is_my_mac(ethhdr->h_dest)) - return NET_RX_DROP; + goto out; icmp_packet = (struct icmp_packet_rr *)skb->data; @@ -1040,40 +1096,56 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) if (icmp_packet->ttl < 2) return recv_icmp_ttl_exceeded(bat_priv, skb); - ret = NET_RX_DROP; - /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, icmp_packet->dst)); - rcu_read_unlock(); + if (!orig_node) + goto unlock; - if ((orig_node) && (orig_node->router)) { + kref_get(&orig_node->refcount); + neigh_node = orig_node->router; - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (!neigh_node) + goto unlock; - /* create a copy of the skb, if needed, to modify it. */ - if (skb_cow(skb, sizeof(struct ethhdr)) < 0) - return NET_RX_DROP; + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } + + rcu_read_unlock(); - icmp_packet = (struct icmp_packet_rr *)skb->data; + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + batman_if = orig_node->router->if_incoming; + memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); - /* decrement ttl */ - icmp_packet->ttl--; + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, sizeof(struct ethhdr)) < 0) + goto out; - /* route it */ - send_skb_packet(skb, batman_if, dstaddr); - ret = NET_RX_SUCCESS; + icmp_packet = (struct icmp_packet_rr *)skb->data; - } else - spin_unlock_bh(&bat_priv->orig_hash_lock); + /* decrement ttl */ + icmp_packet->ttl--; + /* route it */ + send_skb_packet(skb, batman_if, dstaddr); + ret = NET_RX_SUCCESS; + goto out; + +unlock: + rcu_read_unlock(); + spin_unlock_bh(&bat_priv->orig_hash_lock); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); return ret; } @@ -1104,12 +1176,11 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, /* select default router to output */ router = orig_node->router; router_orig = orig_node->router->orig_node; - if (!router_orig) { + if (!router_orig || !atomic_inc_not_zero(&router->refcount)) { rcu_read_unlock(); return NULL; } - if ((!recv_if) && (!bonding_enabled)) goto return_router; @@ -1142,6 +1213,7 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, * is is not on the interface where the packet came * in. */ + neigh_node_free_ref(router); first_candidate = NULL; router = NULL; @@ -1154,16 +1226,23 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, if (!first_candidate) first_candidate = tmp_neigh_node; /* recv_if == NULL on the first node. */ - if (tmp_neigh_node->if_incoming != recv_if) { + if (tmp_neigh_node->if_incoming != recv_if && + atomic_inc_not_zero(&tmp_neigh_node->refcount)) { router = tmp_neigh_node; break; } } /* use the first candidate if nothing was found. */ - if (!router) + if (!router && first_candidate && + atomic_inc_not_zero(&first_candidate->refcount)) router = first_candidate; + if (!router) { + rcu_read_unlock(); + return NULL; + } + /* selected should point to the next element * after the current router */ spin_lock_bh(&primary_orig_node->neigh_list_lock); @@ -1184,21 +1263,34 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, first_candidate = tmp_neigh_node; /* recv_if == NULL on the first node. */ - if (tmp_neigh_node->if_incoming != recv_if) - /* if we don't have a router yet - * or this one is better, choose it. */ - if ((!router) || - (tmp_neigh_node->tq_avg > router->tq_avg)) { - router = tmp_neigh_node; - } + if (tmp_neigh_node->if_incoming == recv_if) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + /* if we don't have a router yet + * or this one is better, choose it. */ + if ((!router) || + (tmp_neigh_node->tq_avg > router->tq_avg)) { + /* decrement refcount of + * previously selected router */ + if (router) + neigh_node_free_ref(router); + + router = tmp_neigh_node; + atomic_inc_not_zero(&router->refcount); + } + + neigh_node_free_ref(tmp_neigh_node); } /* use the first candidate if nothing was found. */ - if (!router) + if (!router && first_candidate && + atomic_inc_not_zero(&first_candidate->refcount)) router = first_candidate; } return_router: - kref_get(&router->refcount); rcu_read_unlock(); return router; } @@ -1232,13 +1324,13 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, int hdr_size) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct orig_node *orig_node; - struct neigh_node *router; + struct orig_node *orig_node = NULL; + struct neigh_node *neigh_node = NULL; struct batman_if *batman_if; uint8_t dstaddr[ETH_ALEN]; struct unicast_packet *unicast_packet; struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); - int ret; + int ret = NET_RX_DROP; struct sk_buff *new_skb; unicast_packet = (struct unicast_packet *)skb->data; @@ -1248,7 +1340,7 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, pr_debug("Warning - can't forward unicast packet from %pM to " "%pM: ttl exceeded\n", ethhdr->h_source, unicast_packet->dest); - return NET_RX_DROP; + goto out; } /* get routing information */ @@ -1257,27 +1349,29 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, unicast_packet->dest)); + if (!orig_node) + goto unlock; + + kref_get(&orig_node->refcount); rcu_read_unlock(); /* find_router() increases neigh_nodes refcount if found. */ - router = find_router(bat_priv, orig_node, recv_if); + neigh_node = find_router(bat_priv, orig_node, recv_if); - if (!router) { + if (!neigh_node) { spin_unlock_bh(&bat_priv->orig_hash_lock); - return NET_RX_DROP; + goto out; } /* don't lock while sending the packets ... we therefore * copy the required data before sending */ - - batman_if = router->if_incoming; - memcpy(dstaddr, router->addr, ETH_ALEN); - + batman_if = neigh_node->if_incoming; + memcpy(dstaddr, neigh_node->addr, ETH_ALEN); spin_unlock_bh(&bat_priv->orig_hash_lock); /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, sizeof(struct ethhdr)) < 0) - return NET_RX_DROP; + goto out; unicast_packet = (struct unicast_packet *)skb->data; @@ -1293,11 +1387,13 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, ret = frag_reassemble_skb(skb, bat_priv, &new_skb); if (ret == NET_RX_DROP) - return NET_RX_DROP; + goto out; /* packet was buffered for late merge */ - if (!new_skb) - return NET_RX_SUCCESS; + if (!new_skb) { + ret = NET_RX_SUCCESS; + goto out; + } skb = new_skb; unicast_packet = (struct unicast_packet *)skb->data; @@ -1308,8 +1404,18 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, /* route it */ send_skb_packet(skb, batman_if, dstaddr); + ret = NET_RX_SUCCESS; + goto out; - return NET_RX_SUCCESS; +unlock: + rcu_read_unlock(); + spin_unlock_bh(&bat_priv->orig_hash_lock); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); + return ret; } int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if) diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 1f833f0..084604a 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -117,9 +117,8 @@ struct neigh_node { struct list_head bonding_list; unsigned long last_valid; unsigned long real_bits[NUM_WORDS]; - struct kref refcount; + atomic_t refcount; struct rcu_head rcu; - struct rcu_head rcu_bond; struct orig_node *orig_node; struct batman_if *if_incoming; }; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 00bfeaf..7ca994c 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -285,38 +285,42 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) struct unicast_packet *unicast_packet; struct orig_node *orig_node = NULL; struct batman_if *batman_if; - struct neigh_node *router; + struct neigh_node *neigh_node; int data_len = skb->len; uint8_t dstaddr[6]; + int ret = 1; spin_lock_bh(&bat_priv->orig_hash_lock); /* get routing information */ if (is_multicast_ether_addr(ethhdr->h_dest)) orig_node = (struct orig_node *)gw_get_selected(bat_priv); + if (orig_node) { + kref_get(&orig_node->refcount); + goto find_router; + } - /* check for hna host */ - if (!orig_node) - orig_node = transtable_search(bat_priv, ethhdr->h_dest); + /* check for hna host - increases orig_node refcount */ + orig_node = transtable_search(bat_priv, ethhdr->h_dest); +find_router: /* find_router() increases neigh_nodes refcount if found. */ - router = find_router(bat_priv, orig_node, NULL); + neigh_node = find_router(bat_priv, orig_node, NULL); - if (!router) + if (!neigh_node) goto unlock; - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = router->if_incoming; - memcpy(dstaddr, router->addr, ETH_ALEN); - - spin_unlock_bh(&bat_priv->orig_hash_lock); - - if (batman_if->if_status != IF_ACTIVE) - goto dropped; + if (neigh_node->if_incoming->if_status != IF_ACTIVE) + goto unlock; if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0) - goto dropped; + goto unlock; + + /* don't lock while sending the packets ... we therefore + * copy the required data before sending */ + batman_if = neigh_node->if_incoming; + memcpy(dstaddr, neigh_node->addr, ETH_ALEN); + spin_unlock_bh(&bat_priv->orig_hash_lock); unicast_packet = (struct unicast_packet *)skb->data; @@ -330,18 +334,25 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) if (atomic_read(&bat_priv->fragmentation) && data_len + sizeof(struct unicast_packet) > - batman_if->net_dev->mtu) { + batman_if->net_dev->mtu) { /* send frag skb decreases ttl */ unicast_packet->ttl++; - return frag_send_skb(skb, bat_priv, batman_if, - dstaddr); + ret = frag_send_skb(skb, bat_priv, batman_if, dstaddr); + goto out; } + send_skb_packet(skb, batman_if, dstaddr); - return 0; + ret = 0; + goto out; unlock: spin_unlock_bh(&bat_priv->orig_hash_lock); -dropped: - kfree_skb(skb); - return 1; +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); + if (ret == 1) + kfree_skb(skb); + return ret; } diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 8092ead..9832d8f 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -764,21 +764,35 @@ static void unicast_vis_packet(struct bat_priv *bat_priv, struct vis_info *info) { struct orig_node *orig_node; + struct neigh_node *neigh_node = NULL; struct sk_buff *skb; struct vis_packet *packet; struct batman_if *batman_if; uint8_t dstaddr[ETH_ALEN]; - spin_lock_bh(&bat_priv->orig_hash_lock); packet = (struct vis_packet *)info->skb_packet->data; + + spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, compare_orig, choose_orig, packet->target_orig)); - rcu_read_unlock(); - if ((!orig_node) || (!orig_node->router)) - goto out; + if (!orig_node) + goto unlock; + + kref_get(&orig_node->refcount); + neigh_node = orig_node->router; + + if (!neigh_node) + goto unlock; + + if (!atomic_inc_not_zero(&neigh_node->refcount)) { + neigh_node = NULL; + goto unlock; + } + + rcu_read_unlock(); /* don't lock while sending the packets ... we therefore * copy the required data before sending */ @@ -790,10 +804,17 @@ static void unicast_vis_packet(struct bat_priv *bat_priv, if (skb) send_skb_packet(skb, batman_if, dstaddr); - return; + goto out; -out: +unlock: + rcu_read_unlock(); spin_unlock_bh(&bat_priv->orig_hash_lock); +out: + if (neigh_node) + neigh_node_free_ref(neigh_node); + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); + return; } /* only send one vis packet. called from send_vis_packets() */ -- cgit v1.1 From 25b6d3c17eaa92ae9700eb8235bc79782613354a Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 10 Feb 2011 14:33:49 +0000 Subject: batman-adv: Correct rcu refcounting for gw_node It might be possible that 2 threads access the same data in the same rcu grace period. The first thread calls call_rcu() to decrement the refcount and free the data while the second thread increases the refcount to use the data. To avoid this race condition all refcount operations have to be atomic. Reported-by: Sven Eckelmann Signed-off-by: Marek Lindner --- net/batman-adv/gateway_client.c | 37 ++++++++++++++++--------------------- net/batman-adv/types.h | 2 +- 2 files changed, 17 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 429a013..517e001 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -28,20 +28,18 @@ #include #include -static void gw_node_free_ref(struct kref *refcount) +static void gw_node_free_rcu(struct rcu_head *rcu) { struct gw_node *gw_node; - gw_node = container_of(refcount, struct gw_node, refcount); + gw_node = container_of(rcu, struct gw_node, rcu); kfree(gw_node); } -static void gw_node_free_rcu(struct rcu_head *rcu) +static void gw_node_free_ref(struct gw_node *gw_node) { - struct gw_node *gw_node; - - gw_node = container_of(rcu, struct gw_node, rcu); - kref_put(&gw_node->refcount, gw_node_free_ref); + if (atomic_dec_and_test(&gw_node->refcount)) + call_rcu(&gw_node->rcu, gw_node_free_rcu); } void *gw_get_selected(struct bat_priv *bat_priv) @@ -61,25 +59,26 @@ void gw_deselect(struct bat_priv *bat_priv) bat_priv->curr_gw = NULL; if (gw_node) - kref_put(&gw_node->refcount, gw_node_free_ref); + gw_node_free_ref(gw_node); } -static struct gw_node *gw_select(struct bat_priv *bat_priv, - struct gw_node *new_gw_node) +static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node) { struct gw_node *curr_gw_node = bat_priv->curr_gw; - if (new_gw_node) - kref_get(&new_gw_node->refcount); + if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) + new_gw_node = NULL; bat_priv->curr_gw = new_gw_node; - return curr_gw_node; + + if (curr_gw_node) + gw_node_free_ref(curr_gw_node); } void gw_election(struct bat_priv *bat_priv) { struct hlist_node *node; - struct gw_node *gw_node, *curr_gw_tmp = NULL, *old_gw_node = NULL; + struct gw_node *gw_node, *curr_gw_tmp = NULL; uint8_t max_tq = 0; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; int down, up; @@ -174,14 +173,10 @@ void gw_election(struct bat_priv *bat_priv) curr_gw_tmp->orig_node->gw_flags, curr_gw_tmp->orig_node->router->tq_avg); - old_gw_node = gw_select(bat_priv, curr_gw_tmp); + gw_select(bat_priv, curr_gw_tmp); } rcu_read_unlock(); - - /* the kfree() has to be outside of the rcu lock */ - if (old_gw_node) - kref_put(&old_gw_node->refcount, gw_node_free_ref); } void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) @@ -242,7 +237,7 @@ static void gw_node_add(struct bat_priv *bat_priv, memset(gw_node, 0, sizeof(struct gw_node)); INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; - kref_init(&gw_node->refcount); + atomic_set(&gw_node->refcount, 1); spin_lock_bh(&bat_priv->gw_list_lock); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list); @@ -325,7 +320,7 @@ void gw_node_purge(struct bat_priv *bat_priv) gw_deselect(bat_priv); hlist_del_rcu(&gw_node->list); - call_rcu(&gw_node->rcu, gw_node_free_rcu); + gw_node_free_ref(gw_node); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 084604a..cfbeb45 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -98,7 +98,7 @@ struct gw_node { struct hlist_node list; struct orig_node *orig_node; unsigned long deleted; - struct kref refcount; + atomic_t refcount; struct rcu_head rcu; }; -- cgit v1.1 From 7d2b554826195372764910da2f0dcb0d9b869108 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 10 Feb 2011 14:33:50 +0000 Subject: batman-adv: Correct rcu refcounting for softif_neigh It might be possible that 2 threads access the same data in the same rcu grace period. The first thread calls call_rcu() to decrement the refcount and free the data while the second thread increases the refcount to use the data. To avoid this race condition all refcount operations have to be atomic. Reported-by: Sven Eckelmann Signed-off-by: Marek Lindner --- net/batman-adv/soft-interface.c | 31 +++++++++++++++---------------- net/batman-adv/types.h | 2 +- 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7e37077..152beaa 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -76,20 +76,18 @@ int my_skb_head_push(struct sk_buff *skb, unsigned int len) return 0; } -static void softif_neigh_free_ref(struct kref *refcount) +static void softif_neigh_free_rcu(struct rcu_head *rcu) { struct softif_neigh *softif_neigh; - softif_neigh = container_of(refcount, struct softif_neigh, refcount); + softif_neigh = container_of(rcu, struct softif_neigh, rcu); kfree(softif_neigh); } -static void softif_neigh_free_rcu(struct rcu_head *rcu) +static void softif_neigh_free_ref(struct softif_neigh *softif_neigh) { - struct softif_neigh *softif_neigh; - - softif_neigh = container_of(rcu, struct softif_neigh, rcu); - kref_put(&softif_neigh->refcount, softif_neigh_free_ref); + if (atomic_dec_and_test(&softif_neigh->refcount)) + call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu); } void softif_neigh_purge(struct bat_priv *bat_priv) @@ -116,11 +114,10 @@ void softif_neigh_purge(struct bat_priv *bat_priv) softif_neigh->addr, softif_neigh->vid); softif_neigh_tmp = bat_priv->softif_neigh; bat_priv->softif_neigh = NULL; - kref_put(&softif_neigh_tmp->refcount, - softif_neigh_free_ref); + softif_neigh_free_ref(softif_neigh_tmp); } - call_rcu(&softif_neigh->rcu, softif_neigh_free_rcu); + softif_neigh_free_ref(softif_neigh); } spin_unlock_bh(&bat_priv->softif_neigh_lock); @@ -141,8 +138,11 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, if (softif_neigh->vid != vid) continue; + if (!atomic_inc_not_zero(&softif_neigh->refcount)) + continue; + softif_neigh->last_seen = jiffies; - goto found; + goto out; } softif_neigh = kzalloc(sizeof(struct softif_neigh), GFP_ATOMIC); @@ -152,15 +152,14 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, memcpy(softif_neigh->addr, addr, ETH_ALEN); softif_neigh->vid = vid; softif_neigh->last_seen = jiffies; - kref_init(&softif_neigh->refcount); + /* initialize with 2 - caller decrements counter by one */ + atomic_set(&softif_neigh->refcount, 2); INIT_HLIST_NODE(&softif_neigh->list); spin_lock_bh(&bat_priv->softif_neigh_lock); hlist_add_head_rcu(&softif_neigh->list, &bat_priv->softif_neigh_list); spin_unlock_bh(&bat_priv->softif_neigh_lock); -found: - kref_get(&softif_neigh->refcount); out: rcu_read_unlock(); return softif_neigh; @@ -264,7 +263,7 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev, softif_neigh->addr, softif_neigh->vid); softif_neigh_tmp = bat_priv->softif_neigh; bat_priv->softif_neigh = softif_neigh; - kref_put(&softif_neigh_tmp->refcount, softif_neigh_free_ref); + softif_neigh_free_ref(softif_neigh_tmp); /* we need to hold the additional reference */ goto err; } @@ -282,7 +281,7 @@ static void softif_batman_recv(struct sk_buff *skb, struct net_device *dev, } out: - kref_put(&softif_neigh->refcount, softif_neigh_free_ref); + softif_neigh_free_ref(softif_neigh); err: kfree_skb(skb); return; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index cfbeb45..96f7c22 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -268,7 +268,7 @@ struct softif_neigh { uint8_t addr[ETH_ALEN]; unsigned long last_seen; short vid; - struct kref refcount; + atomic_t refcount; struct rcu_head rcu; }; -- cgit v1.1 From ed75ccbe26f4a672a41556120390e67c80a2c441 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 10 Feb 2011 14:33:51 +0000 Subject: batman-adv: Correct rcu refcounting for batman_if It might be possible that 2 threads access the same data in the same rcu grace period. The first thread calls call_rcu() to decrement the refcount and free the data while the second thread increases the refcount to use the data. To avoid this race condition all refcount operations have to be atomic. Reported-by: Sven Eckelmann Signed-off-by: Marek Lindner --- net/batman-adv/bat_sysfs.c | 20 +++++++++----------- net/batman-adv/hard-interface.c | 40 +++++++++++++++++++--------------------- net/batman-adv/hard-interface.h | 9 ++++----- net/batman-adv/types.h | 2 +- 4 files changed, 33 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index f7b93a0..93ae20a 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -450,7 +450,7 @@ static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr, length = sprintf(buff, "%s\n", batman_if->if_status == IF_NOT_IN_USE ? "none" : batman_if->soft_iface->name); - kref_put(&batman_if->refcount, hardif_free_ref); + hardif_free_ref(batman_if); return length; } @@ -461,7 +461,7 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, struct net_device *net_dev = kobj_to_netdev(kobj); struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); int status_tmp = -1; - int ret; + int ret = count; if (!batman_if) return count; @@ -472,7 +472,7 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, if (strlen(buff) >= IFNAMSIZ) { pr_err("Invalid parameter for 'mesh_iface' setting received: " "interface name too long '%s'\n", buff); - kref_put(&batman_if->refcount, hardif_free_ref); + hardif_free_ref(batman_if); return -EINVAL; } @@ -482,17 +482,14 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, status_tmp = IF_I_WANT_YOU; if ((batman_if->if_status == status_tmp) || ((batman_if->soft_iface) && - (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) { - kref_put(&batman_if->refcount, hardif_free_ref); - return count; - } + (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) + goto out; if (status_tmp == IF_NOT_IN_USE) { rtnl_lock(); hardif_disable_interface(batman_if); rtnl_unlock(); - kref_put(&batman_if->refcount, hardif_free_ref); - return count; + goto out; } /* if the interface already is in use */ @@ -503,8 +500,9 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, } ret = hardif_enable_interface(batman_if, buff); - kref_put(&batman_if->refcount, hardif_free_ref); +out: + hardif_free_ref(batman_if); return ret; } @@ -537,7 +535,7 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr, break; } - kref_put(&batman_if->refcount, hardif_free_ref); + hardif_free_ref(batman_if); return length; } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e2b001a..8982485 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -40,13 +40,13 @@ static int batman_skb_recv(struct sk_buff *skb, struct packet_type *ptype, struct net_device *orig_dev); -static void hardif_free_rcu(struct rcu_head *rcu) +void hardif_free_rcu(struct rcu_head *rcu) { struct batman_if *batman_if; batman_if = container_of(rcu, struct batman_if, rcu); dev_put(batman_if->net_dev); - kref_put(&batman_if->refcount, hardif_free_ref); + kfree(batman_if); } struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) @@ -55,16 +55,14 @@ struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) rcu_read_lock(); list_for_each_entry_rcu(batman_if, &if_list, list) { - if (batman_if->net_dev == net_dev) + if (batman_if->net_dev == net_dev && + atomic_inc_not_zero(&batman_if->refcount)) goto out; } batman_if = NULL; out: - if (batman_if) - kref_get(&batman_if->refcount); - rcu_read_unlock(); return batman_if; } @@ -105,16 +103,14 @@ static struct batman_if *get_active_batman_if(struct net_device *soft_iface) if (batman_if->soft_iface != soft_iface) continue; - if (batman_if->if_status == IF_ACTIVE) + if (batman_if->if_status == IF_ACTIVE && + atomic_inc_not_zero(&batman_if->refcount)) goto out; } batman_if = NULL; out: - if (batman_if) - kref_get(&batman_if->refcount); - rcu_read_unlock(); return batman_if; } @@ -137,14 +133,14 @@ static void set_primary_if(struct bat_priv *bat_priv, struct batman_packet *batman_packet; struct batman_if *old_if; - if (batman_if) - kref_get(&batman_if->refcount); + if (batman_if && !atomic_inc_not_zero(&batman_if->refcount)) + batman_if = NULL; old_if = bat_priv->primary_if; bat_priv->primary_if = batman_if; if (old_if) - kref_put(&old_if->refcount, hardif_free_ref); + hardif_free_ref(old_if); if (!bat_priv->primary_if) return; @@ -290,6 +286,9 @@ int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) if (batman_if->if_status != IF_NOT_IN_USE) goto out; + if (!atomic_inc_not_zero(&batman_if->refcount)) + goto out; + batman_if->soft_iface = dev_get_by_name(&init_net, iface_name); if (!batman_if->soft_iface) { @@ -328,7 +327,6 @@ int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) batman_if->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN); batman_if->batman_adv_ptype.func = batman_skb_recv; batman_if->batman_adv_ptype.dev = batman_if->net_dev; - kref_get(&batman_if->refcount); dev_add_pack(&batman_if->batman_adv_ptype); atomic_set(&batman_if->seqno, 1); @@ -371,6 +369,7 @@ out: return 0; err: + hardif_free_ref(batman_if); return -ENOMEM; } @@ -387,7 +386,6 @@ void hardif_disable_interface(struct batman_if *batman_if) bat_info(batman_if->soft_iface, "Removing interface: %s\n", batman_if->net_dev->name); dev_remove_pack(&batman_if->batman_adv_ptype); - kref_put(&batman_if->refcount, hardif_free_ref); bat_priv->num_ifaces--; orig_hash_del_if(batman_if, bat_priv->num_ifaces); @@ -399,7 +397,7 @@ void hardif_disable_interface(struct batman_if *batman_if) set_primary_if(bat_priv, new_if); if (new_if) - kref_put(&new_if->refcount, hardif_free_ref); + hardif_free_ref(new_if); } kfree(batman_if->packet_buff); @@ -416,6 +414,7 @@ void hardif_disable_interface(struct batman_if *batman_if) softif_destroy(batman_if->soft_iface); batman_if->soft_iface = NULL; + hardif_free_ref(batman_if); } static struct batman_if *hardif_add_interface(struct net_device *net_dev) @@ -445,7 +444,8 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev) batman_if->soft_iface = NULL; batman_if->if_status = IF_NOT_IN_USE; INIT_LIST_HEAD(&batman_if->list); - kref_init(&batman_if->refcount); + /* extra reference for return */ + atomic_set(&batman_if->refcount, 2); check_known_mac_addr(batman_if->net_dev); @@ -453,8 +453,6 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev) list_add_tail_rcu(&batman_if->list, &if_list); spin_unlock(&if_list_lock); - /* extra reference for return */ - kref_get(&batman_if->refcount); return batman_if; free_if: @@ -476,7 +474,7 @@ static void hardif_remove_interface(struct batman_if *batman_if) batman_if->if_status = IF_TO_BE_REMOVED; sysfs_del_hardif(&batman_if->hardif_obj); - call_rcu(&batman_if->rcu, hardif_free_rcu); + hardif_free_ref(batman_if); } void hardif_remove_interfaces(void) @@ -548,7 +546,7 @@ static int hard_if_event(struct notifier_block *this, }; hardif_put: - kref_put(&batman_if->refcount, hardif_free_ref); + hardif_free_ref(batman_if); out: return NOTIFY_DONE; } diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index ad19543..e488b90 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -37,13 +37,12 @@ void hardif_disable_interface(struct batman_if *batman_if); void hardif_remove_interfaces(void); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); +void hardif_free_rcu(struct rcu_head *rcu); -static inline void hardif_free_ref(struct kref *refcount) +static inline void hardif_free_ref(struct batman_if *batman_if) { - struct batman_if *batman_if; - - batman_if = container_of(refcount, struct batman_if, refcount); - kfree(batman_if); + if (atomic_dec_and_test(&batman_if->refcount)) + call_rcu(&batman_if->rcu, hardif_free_rcu); } #endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 96f7c22..e0140c6 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -43,7 +43,7 @@ struct batman_if { unsigned char *packet_buff; int packet_len; struct kobject *hardif_obj; - struct kref refcount; + atomic_t refcount; struct packet_type batman_adv_ptype; struct net_device *soft_iface; struct rcu_head rcu; -- cgit v1.1 From 0ede9f41b217d8982ab426e3c8c1b692a280a16f Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 25 Jan 2011 21:52:10 +0000 Subject: batman-adv: protect bit operations to count OGMs with spinlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Linus Lüssing Signed-off-by: Marek Lindner --- net/batman-adv/routing.c | 61 ++++++++++++++++++++++++------------------------ net/batman-adv/types.h | 6 ++--- 2 files changed, 33 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 9185666..29a689a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -155,7 +155,8 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; struct hlist_node *node; unsigned char total_count; - int ret = 0; + uint8_t orig_eq_count, neigh_rq_count, tq_own; + int tq_asym_penalty, ret = 0; if (orig_node == orig_neigh_node) { rcu_read_lock(); @@ -216,23 +217,25 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, orig_node->last_valid = jiffies; + spin_lock_bh(&orig_node->ogm_cnt_lock); + orig_eq_count = orig_neigh_node->bcast_own_sum[if_incoming->if_num]; + neigh_rq_count = neigh_node->real_packet_count; + spin_unlock_bh(&orig_node->ogm_cnt_lock); + /* pay attention to not get a value bigger than 100 % */ - total_count = (orig_neigh_node->bcast_own_sum[if_incoming->if_num] > - neigh_node->real_packet_count ? - neigh_node->real_packet_count : - orig_neigh_node->bcast_own_sum[if_incoming->if_num]); + total_count = (orig_eq_count > neigh_rq_count ? + neigh_rq_count : orig_eq_count); /* if we have too few packets (too less data) we set tq_own to zero */ /* if we receive too few packets it is not considered bidirectional */ if ((total_count < TQ_LOCAL_BIDRECT_SEND_MINIMUM) || - (neigh_node->real_packet_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) - orig_neigh_node->tq_own = 0; + (neigh_rq_count < TQ_LOCAL_BIDRECT_RECV_MINIMUM)) + tq_own = 0; else /* neigh_node->real_packet_count is never zero as we * only purge old information when getting new * information */ - orig_neigh_node->tq_own = (TQ_MAX_VALUE * total_count) / - neigh_node->real_packet_count; + tq_own = (TQ_MAX_VALUE * total_count) / neigh_rq_count; /* * 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does @@ -240,20 +243,16 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, * punishes asymmetric links more. This will give a value * between 0 and TQ_MAX_VALUE */ - orig_neigh_node->tq_asym_penalty = - TQ_MAX_VALUE - - (TQ_MAX_VALUE * - (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * - (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count) * - (TQ_LOCAL_WINDOW_SIZE - neigh_node->real_packet_count)) / - (TQ_LOCAL_WINDOW_SIZE * - TQ_LOCAL_WINDOW_SIZE * - TQ_LOCAL_WINDOW_SIZE); - - batman_packet->tq = ((batman_packet->tq * - orig_neigh_node->tq_own * - orig_neigh_node->tq_asym_penalty) / - (TQ_MAX_VALUE * TQ_MAX_VALUE)); + tq_asym_penalty = TQ_MAX_VALUE - (TQ_MAX_VALUE * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count) * + (TQ_LOCAL_WINDOW_SIZE - neigh_rq_count)) / + (TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE * + TQ_LOCAL_WINDOW_SIZE); + + batman_packet->tq = ((batman_packet->tq * tq_own * tq_asym_penalty) / + (TQ_MAX_VALUE * TQ_MAX_VALUE)); bat_dbg(DBG_BATMAN, bat_priv, "bidirectional: " @@ -261,8 +260,7 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, "real recv = %2i, local tq: %3i, asym_penalty: %3i, " "total tq: %3i\n", orig_node->orig, orig_neigh_node->orig, total_count, - neigh_node->real_packet_count, orig_neigh_node->tq_own, - orig_neigh_node->tq_asym_penalty, batman_packet->tq); + neigh_rq_count, tq_own, tq_asym_penalty, batman_packet->tq); /* if link has the minimum required transmission quality * consider it bidirectional */ @@ -559,18 +557,19 @@ static char count_real_packets(struct ethhdr *ethhdr, char is_duplicate = 0; int32_t seq_diff; int need_update = 0; - int set_mark; + int set_mark, ret = -1; orig_node = get_orig_node(bat_priv, batman_packet->orig); if (!orig_node) return 0; + spin_lock_bh(&orig_node->ogm_cnt_lock); seq_diff = batman_packet->seqno - orig_node->last_real_seqno; /* signalize caller that the packet is to be dropped. */ if (window_protected(bat_priv, seq_diff, &orig_node->batman_seqno_reset)) - goto err; + goto out; rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, node, @@ -603,12 +602,12 @@ static char count_real_packets(struct ethhdr *ethhdr, orig_node->last_real_seqno = batman_packet->seqno; } - kref_put(&orig_node->refcount, orig_node_free_ref); - return is_duplicate; + ret = is_duplicate; -err: +out: + spin_unlock_bh(&orig_node->ogm_cnt_lock); kref_put(&orig_node->refcount, orig_node_free_ref); - return -1; + return ret; } void receive_bat_packet(struct ethhdr *ethhdr, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index e0140c6..9a14276 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -70,8 +70,6 @@ struct orig_node { struct neigh_node *router; unsigned long *bcast_own; uint8_t *bcast_own_sum; - uint8_t tq_own; - int tq_asym_penalty; unsigned long last_valid; unsigned long bcast_seqno_reset; unsigned long batman_seqno_reset; @@ -89,7 +87,9 @@ struct orig_node { struct kref refcount; struct bat_priv *bat_priv; unsigned long last_frag_packet; - spinlock_t ogm_cnt_lock; /* protects ogm counter */ + spinlock_t ogm_cnt_lock; /* protects: bcast_own, bcast_own_sum, + * neigh_node->real_bits, + * neigh_node->real_packet_count */ atomic_t bond_candidates; struct list_head bond_list; }; -- cgit v1.1 From f3e0008f01b275bd08bd416cfcaa7021dd6bc277 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 25 Jan 2011 21:52:11 +0000 Subject: batman-adv: make broadcast seqno operations atomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Batman-adv could receive several payload broadcasts at the same time that would trigger access to the broadcast seqno sliding window to determine whether this is a new broadcast or not. If these incoming broadcasts are accessing the sliding window simultaneously it could be left in an inconsistent state. Therefore it is necessary to make sure this access is atomic. Reported-by: Linus Lüssing Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 1 + net/batman-adv/routing.c | 56 +++++++++++++++++++++++++++------------------ net/batman-adv/types.h | 2 ++ 3 files changed, 37 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 61299da..d9a8e31 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -215,6 +215,7 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) INIT_HLIST_HEAD(&orig_node->neigh_list); INIT_LIST_HEAD(&orig_node->bond_list); spin_lock_init(&orig_node->ogm_cnt_lock); + spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); kref_init(&orig_node->refcount); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 29a689a..ce68815 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1473,81 +1473,93 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if) int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct orig_node *orig_node; + struct orig_node *orig_node = NULL; struct bcast_packet *bcast_packet; struct ethhdr *ethhdr; int hdr_size = sizeof(struct bcast_packet); + int ret = NET_RX_DROP; int32_t seq_diff; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - return NET_RX_DROP; + goto out; ethhdr = (struct ethhdr *)skb_mac_header(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) - return NET_RX_DROP; + goto out; /* packet with broadcast sender address */ if (is_broadcast_ether_addr(ethhdr->h_source)) - return NET_RX_DROP; + goto out; /* ignore broadcasts sent by myself */ if (is_my_mac(ethhdr->h_source)) - return NET_RX_DROP; + goto out; bcast_packet = (struct bcast_packet *)skb->data; /* ignore broadcasts originated by myself */ if (is_my_mac(bcast_packet->orig)) - return NET_RX_DROP; + goto out; if (bcast_packet->ttl < 2) - return NET_RX_DROP; + goto out; spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = ((struct orig_node *) hash_find(bat_priv->orig_hash, compare_orig, choose_orig, bcast_packet->orig)); + + if (!orig_node) + goto rcu_unlock; + + kref_get(&orig_node->refcount); rcu_read_unlock(); - if (!orig_node) { - spin_unlock_bh(&bat_priv->orig_hash_lock); - return NET_RX_DROP; - } + spin_lock_bh(&orig_node->bcast_seqno_lock); /* check whether the packet is a duplicate */ - if (get_bit_status(orig_node->bcast_bits, - orig_node->last_bcast_seqno, - ntohl(bcast_packet->seqno))) { - spin_unlock_bh(&bat_priv->orig_hash_lock); - return NET_RX_DROP; - } + if (get_bit_status(orig_node->bcast_bits, orig_node->last_bcast_seqno, + ntohl(bcast_packet->seqno))) + goto spin_unlock; seq_diff = ntohl(bcast_packet->seqno) - orig_node->last_bcast_seqno; /* check whether the packet is old and the host just restarted. */ if (window_protected(bat_priv, seq_diff, - &orig_node->bcast_seqno_reset)) { - spin_unlock_bh(&bat_priv->orig_hash_lock); - return NET_RX_DROP; - } + &orig_node->bcast_seqno_reset)) + goto spin_unlock; /* mark broadcast in flood history, update window position * if required. */ if (bit_get_packet(bat_priv, orig_node->bcast_bits, seq_diff, 1)) orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno); + spin_unlock_bh(&orig_node->bcast_seqno_lock); spin_unlock_bh(&bat_priv->orig_hash_lock); + /* rebroadcast packet */ add_bcast_packet_to_list(bat_priv, skb); /* broadcast for me */ interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size); + ret = NET_RX_SUCCESS; + goto out; - return NET_RX_SUCCESS; +rcu_unlock: + rcu_read_unlock(); + spin_unlock_bh(&bat_priv->orig_hash_lock); + goto out; +spin_unlock: + spin_unlock_bh(&orig_node->bcast_seqno_lock); + spin_unlock_bh(&bat_priv->orig_hash_lock); +out: + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); + return ret; } int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if) diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 9a14276..e1f3e5e 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -90,6 +90,8 @@ struct orig_node { spinlock_t ogm_cnt_lock; /* protects: bcast_own, bcast_own_sum, * neigh_node->real_bits, * neigh_node->real_packet_count */ + spinlock_t bcast_seqno_lock; /* protects bcast_bits, + * last_bcast_seqno */ atomic_t bond_candidates; struct list_head bond_list; }; -- cgit v1.1 From 5d02b3cdfafeb23ab7cf43ef1d2118007370e8d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sun, 13 Feb 2011 21:13:02 +0000 Subject: batman-adv: Make bat_priv->curr_gw an rcu protected pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rcu protected macros rcu_dereference() and rcu_assign_pointer() for the bat_priv->curr_gw need to be used, as well as spin/rcu locking. Otherwise we might end up using a curr_gw pointer pointing to already freed memory. Reported-by: Sven Eckelmann Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner --- net/batman-adv/gateway_client.c | 100 ++++++++++++++++++++++++++++------------ net/batman-adv/types.h | 4 +- 2 files changed, 72 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 517e001..a3e842f 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -44,19 +44,29 @@ static void gw_node_free_ref(struct gw_node *gw_node) void *gw_get_selected(struct bat_priv *bat_priv) { - struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; + struct gw_node *curr_gateway_tmp; + struct orig_node *orig_node = NULL; + rcu_read_lock(); + curr_gateway_tmp = rcu_dereference(bat_priv->curr_gw); if (!curr_gateway_tmp) - return NULL; + goto out; + + orig_node = curr_gateway_tmp->orig_node; - return curr_gateway_tmp->orig_node; +out: + rcu_read_unlock(); + return orig_node; } void gw_deselect(struct bat_priv *bat_priv) { - struct gw_node *gw_node = bat_priv->curr_gw; + struct gw_node *gw_node; - bat_priv->curr_gw = NULL; + spin_lock_bh(&bat_priv->gw_list_lock); + gw_node = rcu_dereference(bat_priv->curr_gw); + rcu_assign_pointer(bat_priv->curr_gw, NULL); + spin_unlock_bh(&bat_priv->gw_list_lock); if (gw_node) gw_node_free_ref(gw_node); @@ -64,12 +74,15 @@ void gw_deselect(struct bat_priv *bat_priv) static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node) { - struct gw_node *curr_gw_node = bat_priv->curr_gw; + struct gw_node *curr_gw_node; if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount)) new_gw_node = NULL; - bat_priv->curr_gw = new_gw_node; + spin_lock_bh(&bat_priv->gw_list_lock); + curr_gw_node = rcu_dereference(bat_priv->curr_gw); + rcu_assign_pointer(bat_priv->curr_gw, new_gw_node); + spin_unlock_bh(&bat_priv->gw_list_lock); if (curr_gw_node) gw_node_free_ref(curr_gw_node); @@ -78,7 +91,7 @@ static void gw_select(struct bat_priv *bat_priv, struct gw_node *new_gw_node) void gw_election(struct bat_priv *bat_priv) { struct hlist_node *node; - struct gw_node *gw_node, *curr_gw_tmp = NULL; + struct gw_node *gw_node, *curr_gw, *curr_gw_tmp = NULL; uint8_t max_tq = 0; uint32_t max_gw_factor = 0, tmp_gw_factor = 0; int down, up; @@ -92,19 +105,23 @@ void gw_election(struct bat_priv *bat_priv) if (atomic_read(&bat_priv->gw_mode) != GW_MODE_CLIENT) return; - if (bat_priv->curr_gw) + rcu_read_lock(); + curr_gw = rcu_dereference(bat_priv->curr_gw); + if (curr_gw) { + rcu_read_unlock(); return; + } - rcu_read_lock(); if (hlist_empty(&bat_priv->gw_list)) { - rcu_read_unlock(); - if (bat_priv->curr_gw) { + if (curr_gw) { + rcu_read_unlock(); bat_dbg(DBG_BATMAN, bat_priv, "Removing selected gateway - " "no gateway in range\n"); gw_deselect(bat_priv); - } + } else + rcu_read_unlock(); return; } @@ -153,12 +170,12 @@ void gw_election(struct bat_priv *bat_priv) max_gw_factor = tmp_gw_factor; } - if (bat_priv->curr_gw != curr_gw_tmp) { - if ((bat_priv->curr_gw) && (!curr_gw_tmp)) + if (curr_gw != curr_gw_tmp) { + if ((curr_gw) && (!curr_gw_tmp)) bat_dbg(DBG_BATMAN, bat_priv, "Removing selected gateway - " "no gateway in range\n"); - else if ((!bat_priv->curr_gw) && (curr_gw_tmp)) + else if ((!curr_gw) && (curr_gw_tmp)) bat_dbg(DBG_BATMAN, bat_priv, "Adding route to gateway %pM " "(gw_flags: %i, tq: %i)\n", @@ -181,31 +198,35 @@ void gw_election(struct bat_priv *bat_priv) void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) { - struct gw_node *curr_gateway_tmp = bat_priv->curr_gw; + struct gw_node *curr_gateway_tmp; uint8_t gw_tq_avg, orig_tq_avg; + rcu_read_lock(); + curr_gateway_tmp = rcu_dereference(bat_priv->curr_gw); if (!curr_gateway_tmp) - return; + goto out_rcu; if (!curr_gateway_tmp->orig_node) - goto deselect; + goto deselect_rcu; if (!curr_gateway_tmp->orig_node->router) - goto deselect; + goto deselect_rcu; /* this node already is the gateway */ if (curr_gateway_tmp->orig_node == orig_node) - return; + goto out_rcu; if (!orig_node->router) - return; + goto out_rcu; gw_tq_avg = curr_gateway_tmp->orig_node->router->tq_avg; + rcu_read_unlock(); + orig_tq_avg = orig_node->router->tq_avg; /* the TQ value has to be better */ if (orig_tq_avg < gw_tq_avg) - return; + goto out; /** * if the routing class is greater than 3 the value tells us how much @@ -213,15 +234,23 @@ void gw_check_election(struct bat_priv *bat_priv, struct orig_node *orig_node) **/ if ((atomic_read(&bat_priv->gw_sel_class) > 3) && (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class))) - return; + goto out; bat_dbg(DBG_BATMAN, bat_priv, "Restarting gateway selection: better gateway found (tq curr: " "%i, tq new: %i)\n", gw_tq_avg, orig_tq_avg); + goto deselect; +out_rcu: + rcu_read_unlock(); + goto out; +deselect_rcu: + rcu_read_unlock(); deselect: gw_deselect(bat_priv); +out: + return; } static void gw_node_add(struct bat_priv *bat_priv, @@ -278,7 +307,7 @@ void gw_node_update(struct bat_priv *bat_priv, "Gateway %pM removed from gateway list\n", orig_node->orig); - if (gw_node == bat_priv->curr_gw) { + if (gw_node == rcu_dereference(bat_priv->curr_gw)) { rcu_read_unlock(); gw_deselect(bat_priv); return; @@ -316,7 +345,7 @@ void gw_node_purge(struct bat_priv *bat_priv) atomic_read(&bat_priv->mesh_state) == MESH_ACTIVE) continue; - if (bat_priv->curr_gw == gw_node) + if (rcu_dereference(bat_priv->curr_gw) == gw_node) gw_deselect(bat_priv); hlist_del_rcu(&gw_node->list); @@ -330,12 +359,16 @@ void gw_node_purge(struct bat_priv *bat_priv) static int _write_buffer_text(struct bat_priv *bat_priv, struct seq_file *seq, struct gw_node *gw_node) { - int down, up; + struct gw_node *curr_gw; + int down, up, ret; gw_bandwidth_to_kbit(gw_node->orig_node->gw_flags, &down, &up); - return seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n", - (bat_priv->curr_gw == gw_node ? "=>" : " "), + rcu_read_lock(); + curr_gw = rcu_dereference(bat_priv->curr_gw); + + ret = seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %3i - %i%s/%i%s\n", + (curr_gw == gw_node ? "=>" : " "), gw_node->orig_node->orig, gw_node->orig_node->router->tq_avg, gw_node->orig_node->router->addr, @@ -345,6 +378,9 @@ static int _write_buffer_text(struct bat_priv *bat_priv, (down > 2048 ? "MBit" : "KBit"), (up > 2048 ? up / 1024 : up), (up > 2048 ? "MBit" : "KBit")); + + rcu_read_unlock(); + return ret; } int gw_client_seq_print_text(struct seq_file *seq, void *offset) @@ -465,8 +501,12 @@ int gw_is_target(struct bat_priv *bat_priv, struct sk_buff *skb) if (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER) return -1; - if (!bat_priv->curr_gw) + rcu_read_lock(); + if (!rcu_dereference(bat_priv->curr_gw)) { + rcu_read_unlock(); return 0; + } + rcu_read_unlock(); return 1; } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index e1f3e5e..3dd5e77 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -162,7 +162,7 @@ struct bat_priv { spinlock_t forw_bcast_list_lock; /* protects */ spinlock_t hna_lhash_lock; /* protects hna_local_hash */ spinlock_t hna_ghash_lock; /* protects hna_global_hash */ - spinlock_t gw_list_lock; /* protects gw_list */ + spinlock_t gw_list_lock; /* protects gw_list and curr_gw */ spinlock_t vis_hash_lock; /* protects vis_hash */ spinlock_t vis_list_lock; /* protects vis_info::recv_list */ spinlock_t softif_neigh_lock; /* protects soft-interface neigh list */ @@ -171,7 +171,7 @@ struct bat_priv { struct delayed_work hna_work; struct delayed_work orig_work; struct delayed_work vis_work; - struct gw_node *curr_gw; + struct gw_node __rcu *curr_gw; /* rcu protected pointer */ struct vis_info *my_vis_info; }; -- cgit v1.1 From 43c70ad5ce5691cce24dae6610731694c0f3fcc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sun, 13 Feb 2011 21:13:04 +0000 Subject: batman-adv: Increase orig_node refcount before releasing rcu read lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When unicast_send_skb() is increasing the orig_node's refcount another thread might have been freeing this orig_node already. We need to increase the refcount in the rcu read lock protected area to avoid that. Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner --- net/batman-adv/gateway_client.c | 3 +++ net/batman-adv/unicast.c | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index a3e842f..41eba8a 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -54,6 +54,9 @@ void *gw_get_selected(struct bat_priv *bat_priv) orig_node = curr_gateway_tmp->orig_node; + if (orig_node) + kref_get(&orig_node->refcount); + out: rcu_read_unlock(); return orig_node; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 7ca994c..0603cea 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -293,10 +293,9 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) spin_lock_bh(&bat_priv->orig_hash_lock); /* get routing information */ - if (is_multicast_ether_addr(ethhdr->h_dest)) + if (is_multicast_ether_addr(ethhdr->h_dest)) { orig_node = (struct orig_node *)gw_get_selected(bat_priv); - if (orig_node) { - kref_get(&orig_node->refcount); + if (orig_node) goto find_router; } -- cgit v1.1 From 9e0b33c221f1364e4d7562177a918eef8e85317a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 18 Feb 2011 12:20:13 +0000 Subject: batman-adv: Fix possible buffer overflow in softif neigh list output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When printing the soft interface table the number of entries in the softif neigh list are first being counted and a fitting buffer allocated. After that the softif neigh list gets locked again and the buffer printed - which has the following two issues: For one thing, the softif neigh list might have grown when reacquiring the rcu lock, which results in writing outside of the allocated buffer. Furthermore 31 Bytes are not enough for printing an entry with a vid of more than 2 digits. The manual buffering is unnecessary, we can safely print to the seq directly during the rcu_read_lock(). Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner --- net/batman-adv/soft-interface.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 152beaa..c30ccd6 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -171,8 +171,6 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset) struct bat_priv *bat_priv = netdev_priv(net_dev); struct softif_neigh *softif_neigh; struct hlist_node *node; - size_t buf_size, pos; - char *buff; if (!bat_priv->primary_if) { return seq_printf(seq, "BATMAN mesh %s disabled - " @@ -182,33 +180,15 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "Softif neighbor list (%s)\n", net_dev->name); - buf_size = 1; - /* Estimate length for: " xx:xx:xx:xx:xx:xx\n" */ rcu_read_lock(); hlist_for_each_entry_rcu(softif_neigh, node, &bat_priv->softif_neigh_list, list) - buf_size += 30; - rcu_read_unlock(); - - buff = kmalloc(buf_size, GFP_ATOMIC); - if (!buff) - return -ENOMEM; - - buff[0] = '\0'; - pos = 0; - - rcu_read_lock(); - hlist_for_each_entry_rcu(softif_neigh, node, - &bat_priv->softif_neigh_list, list) { - pos += snprintf(buff + pos, 31, "%s %pM (vid: %d)\n", + seq_printf(seq, "%s %pM (vid: %d)\n", bat_priv->softif_neigh == softif_neigh ? "=>" : " ", softif_neigh->addr, softif_neigh->vid); - } rcu_read_unlock(); - seq_printf(seq, "%s", buff); - kfree(buff); return 0; } -- cgit v1.1 From 39901e716275da4e831b40f9e45a1b61d6a776dc Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 18 Feb 2011 12:28:08 +0000 Subject: batman-adv: separate ethernet comparing calls from hash functions Note: The function compare_ether_addr() provided by the Linux kernel requires aligned memory. Signed-off-by: Marek Lindner --- net/batman-adv/hard-interface.c | 2 +- net/batman-adv/main.c | 2 +- net/batman-adv/main.h | 10 +++++++ net/batman-adv/routing.c | 55 +++++++++++++++++++------------------- net/batman-adv/send.c | 2 +- net/batman-adv/soft-interface.c | 2 +- net/batman-adv/translation-table.c | 2 +- net/batman-adv/vis.c | 18 ++++++------- 8 files changed, 51 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 8982485..de9bd36 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -187,7 +187,7 @@ static void check_known_mac_addr(struct net_device *net_dev) if (batman_if->net_dev == net_dev) continue; - if (!compare_orig(batman_if->net_dev->dev_addr, + if (!compare_eth(batman_if->net_dev->dev_addr, net_dev->dev_addr)) continue; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 06d956c..3f977ea 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -161,7 +161,7 @@ int is_my_mac(uint8_t *addr) if (batman_if->if_status != IF_ACTIVE) continue; - if (compare_orig(batman_if->net_dev->dev_addr, addr)) { + if (compare_eth(batman_if->net_dev->dev_addr, addr)) { rcu_read_unlock(); return 1; } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index e235d7b..06b5b99 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -165,4 +165,14 @@ static inline void bat_dbg(char type __always_unused, pr_err("%s: " fmt, _netdev->name, ## arg); \ } while (0) +/** + * returns 1 if they are the same ethernet addr + * + * note: can't use compare_ether_addr() as it requires aligned memory + */ +static inline int compare_eth(void *data1, void *data2) +{ + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index ce68815..b54bf6e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -163,8 +163,8 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_node->neigh_list, list) { - if (compare_orig(tmp_neigh_node->addr, - orig_neigh_node->orig) && + if (compare_eth(tmp_neigh_node->addr, + orig_neigh_node->orig) && (tmp_neigh_node->if_incoming == if_incoming)) neigh_node = tmp_neigh_node; } @@ -192,8 +192,8 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_neigh_node->neigh_list, list) { - if (compare_orig(tmp_neigh_node->addr, - orig_neigh_node->orig) && + if (compare_eth(tmp_neigh_node->addr, + orig_neigh_node->orig) && (tmp_neigh_node->if_incoming == if_incoming)) neigh_node = tmp_neigh_node; } @@ -304,8 +304,8 @@ static void bonding_candidate_add(struct orig_node *orig_node, spin_lock_bh(&orig_node->neigh_list_lock); /* only consider if it has the same primary address ... */ - if (!compare_orig(orig_node->orig, - neigh_node->orig_node->primary_addr)) + if (!compare_eth(orig_node->orig, + neigh_node->orig_node->primary_addr)) goto candidate_del; if (!orig_node->router) @@ -334,7 +334,7 @@ static void bonding_candidate_add(struct orig_node *orig_node, continue; if ((neigh_node->if_incoming == tmp_neigh_node->if_incoming) || - (compare_orig(neigh_node->addr, tmp_neigh_node->addr))) { + (compare_eth(neigh_node->addr, tmp_neigh_node->addr))) { interference_candidate = 1; break; } @@ -394,7 +394,7 @@ static void update_orig(struct bat_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_node->neigh_list, list) { - if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && + if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && (tmp_neigh_node->if_incoming == if_incoming) && atomic_inc_not_zero(&tmp_neigh_node->refcount)) { if (neigh_node) @@ -579,7 +579,7 @@ static char count_real_packets(struct ethhdr *ethhdr, orig_node->last_real_seqno, batman_packet->seqno); - if (compare_orig(tmp_neigh_node->addr, ethhdr->h_source) && + if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && (tmp_neigh_node->if_incoming == if_incoming)) set_mark = 1; else @@ -644,8 +644,8 @@ void receive_bat_packet(struct ethhdr *ethhdr, has_directlink_flag = (batman_packet->flags & DIRECTLINK ? 1 : 0); - is_single_hop_neigh = (compare_orig(ethhdr->h_source, - batman_packet->orig) ? 1 : 0); + is_single_hop_neigh = (compare_eth(ethhdr->h_source, + batman_packet->orig) ? 1 : 0); bat_dbg(DBG_BATMAN, bat_priv, "Received BATMAN packet via NB: %pM, IF: %s [%pM] " @@ -665,19 +665,19 @@ void receive_bat_packet(struct ethhdr *ethhdr, if (batman_if->soft_iface != if_incoming->soft_iface) continue; - if (compare_orig(ethhdr->h_source, - batman_if->net_dev->dev_addr)) + if (compare_eth(ethhdr->h_source, + batman_if->net_dev->dev_addr)) is_my_addr = 1; - if (compare_orig(batman_packet->orig, - batman_if->net_dev->dev_addr)) + if (compare_eth(batman_packet->orig, + batman_if->net_dev->dev_addr)) is_my_orig = 1; - if (compare_orig(batman_packet->prev_sender, - batman_if->net_dev->dev_addr)) + if (compare_eth(batman_packet->prev_sender, + batman_if->net_dev->dev_addr)) is_my_oldorig = 1; - if (compare_orig(ethhdr->h_source, broadcast_addr)) + if (compare_eth(ethhdr->h_source, broadcast_addr)) is_broadcast = 1; } rcu_read_unlock(); @@ -717,8 +717,8 @@ void receive_bat_packet(struct ethhdr *ethhdr, /* if received seqno equals last send seqno save new * seqno for bidirectional check */ if (has_directlink_flag && - compare_orig(if_incoming->net_dev->dev_addr, - batman_packet->orig) && + compare_eth(if_incoming->net_dev->dev_addr, + batman_packet->orig) && (batman_packet->seqno - if_incoming_seqno + 2 == 0)) { offset = if_incoming->if_num * NUM_WORDS; @@ -765,11 +765,11 @@ void receive_bat_packet(struct ethhdr *ethhdr, /* avoid temporary routing loops */ if ((orig_node->router) && (orig_node->router->orig_node->router) && - (compare_orig(orig_node->router->addr, - batman_packet->prev_sender)) && - !(compare_orig(batman_packet->orig, batman_packet->prev_sender)) && - (compare_orig(orig_node->router->addr, - orig_node->router->orig_node->router->addr))) { + (compare_eth(orig_node->router->addr, + batman_packet->prev_sender)) && + !(compare_eth(batman_packet->orig, batman_packet->prev_sender)) && + (compare_eth(orig_node->router->addr, + orig_node->router->orig_node->router->addr))) { bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: ignoring all rebroadcast packets that " "may make me loop (sender: %pM)\n", ethhdr->h_source); @@ -1185,14 +1185,13 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, /* if we have something in the primary_addr, we can search * for a potential bonding candidate. */ - if (memcmp(router_orig->primary_addr, zero_mac, ETH_ALEN) == 0) + if (compare_eth(router_orig->primary_addr, zero_mac)) goto return_router; /* find the orig_node which has the primary interface. might * even be the same as our router_orig in many cases */ - if (memcmp(router_orig->primary_addr, - router_orig->orig, ETH_ALEN) == 0) { + if (compare_eth(router_orig->primary_addr, router_orig->orig)) { primary_orig_node = router_orig; } else { primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig, diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 8314276..f0232ad 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -326,7 +326,7 @@ void schedule_forward_packet(struct orig_node *orig_node, if ((orig_node->router) && (orig_node->router->tq_avg != 0)) { /* rebroadcast ogm of best ranking neighbor as is */ - if (!compare_orig(orig_node->router->addr, ethhdr->h_source)) { + if (!compare_eth(orig_node->router->addr, ethhdr->h_source)) { batman_packet->tq = orig_node->router->tq_avg; if (orig_node->router->last_ttl) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c30ccd6..bea2dcf 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -132,7 +132,7 @@ static struct softif_neigh *softif_neigh_get(struct bat_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(softif_neigh, node, &bat_priv->softif_neigh_list, list) { - if (memcmp(softif_neigh->addr, addr, ETH_ALEN) != 0) + if (!compare_eth(softif_neigh->addr, addr)) continue; if (softif_neigh->vid != vid) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b25e4b3..77d0ee0 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -101,7 +101,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) hna_local_entry->last_seen = jiffies; /* the batman interface mac address should never be purged */ - if (compare_orig(addr, soft_iface->dev_addr)) + if (compare_eth(addr, soft_iface->dev_addr)) hna_local_entry->never_purge = 1; else hna_local_entry->never_purge = 0; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 9832d8f..fc97329 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -75,7 +75,7 @@ static int vis_info_cmp(void *data1, void *data2) d2 = data2; p1 = (struct vis_packet *)d1->skb_packet->data; p2 = (struct vis_packet *)d2->skb_packet->data; - return compare_orig(p1->vis_orig, p2->vis_orig); + return compare_eth(p1->vis_orig, p2->vis_orig); } /* hash function to choose an entry in a hash table of given size */ @@ -113,7 +113,7 @@ static void vis_data_insert_interface(const uint8_t *interface, struct hlist_node *pos; hlist_for_each_entry(entry, pos, if_list, list) { - if (compare_orig(entry->addr, (void *)interface)) + if (compare_eth(entry->addr, (void *)interface)) return; } @@ -165,7 +165,7 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry, /* maximal length: max(4+17+2, 3+17+1+3+2) == 26 */ if (primary && entry->quality == 0) return sprintf(buff, "HNA %pM, ", entry->dest); - else if (compare_orig(entry->src, src)) + else if (compare_eth(entry->src, src)) return sprintf(buff, "TQ %pM %d, ", entry->dest, entry->quality); @@ -212,7 +212,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) if (entries[j].quality == 0) continue; compare = - compare_orig(entries[j].src, packet->vis_orig); + compare_eth(entries[j].src, packet->vis_orig); vis_data_insert_interface(entries[j].src, &vis_if_list, compare); @@ -222,7 +222,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) buf_size += 18 + 26 * packet->entries; /* add primary/secondary records */ - if (compare_orig(entry->addr, packet->vis_orig)) + if (compare_eth(entry->addr, packet->vis_orig)) buf_size += vis_data_count_prim_sec(&vis_if_list); @@ -258,7 +258,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) if (entries[j].quality == 0) continue; compare = - compare_orig(entries[j].src, packet->vis_orig); + compare_eth(entries[j].src, packet->vis_orig); vis_data_insert_interface(entries[j].src, &vis_if_list, compare); @@ -276,7 +276,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) entry->primary); /* add primary/secondary records */ - if (compare_orig(entry->addr, packet->vis_orig)) + if (compare_eth(entry->addr, packet->vis_orig)) buff_pos += vis_data_read_prim_sec(buff + buff_pos, &vis_if_list); @@ -344,7 +344,7 @@ static int recv_list_is_in(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->vis_list_lock); list_for_each_entry(entry, recv_list, list) { - if (memcmp(entry->mac, mac, ETH_ALEN) == 0) { + if (compare_eth(entry->mac, mac)) { spin_unlock_bh(&bat_priv->vis_list_lock); return 1; } @@ -617,7 +617,7 @@ static int generate_vis_packet(struct bat_priv *bat_priv) if (!neigh_node) continue; - if (!compare_orig(neigh_node->addr, orig_node->orig)) + if (!compare_eth(neigh_node->addr, orig_node->orig)) continue; if (neigh_node->if_incoming->if_status != IF_ACTIVE) -- cgit v1.1 From 7aadf889e897155c45cda230d2a6701ad1fbff61 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 18 Feb 2011 12:28:09 +0000 Subject: batman-adv: remove extra layer between hash and hash element - hash bucket Signed-off-by: Marek Lindner --- net/batman-adv/hash.c | 8 -- net/batman-adv/hash.h | 95 +++++------------ net/batman-adv/icmp_socket.c | 5 +- net/batman-adv/originator.c | 73 ++++--------- net/batman-adv/originator.h | 35 ++++++- net/batman-adv/routing.c | 40 +++---- net/batman-adv/translation-table.c | 208 ++++++++++++++++++++++++------------- net/batman-adv/types.h | 4 + net/batman-adv/unicast.c | 21 ++-- net/batman-adv/vis.c | 103 ++++++++++-------- 10 files changed, 298 insertions(+), 294 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 0265366..c5213d8 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -68,11 +68,3 @@ free_hash: kfree(hash); return NULL; } - -void bucket_free_rcu(struct rcu_head *rcu) -{ - struct element_t *bucket; - - bucket = container_of(rcu, struct element_t, rcu); - kfree(bucket); -} diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 3c48c6b..434822b 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -28,19 +28,13 @@ * compare 2 element datas for their keys, * return 0 if same and not 0 if not * same */ -typedef int (*hashdata_compare_cb)(void *, void *); +typedef int (*hashdata_compare_cb)(struct hlist_node *, void *); /* the hashfunction, should return an index * based on the key in the data of the first * argument and the size the second */ typedef int (*hashdata_choose_cb)(void *, int); -typedef void (*hashdata_free_cb)(void *, void *); - -struct element_t { - void *data; /* pointer to the data */ - struct hlist_node hlist; /* bucket list pointer */ - struct rcu_head rcu; -}; +typedef void (*hashdata_free_cb)(struct hlist_node *, void *); struct hashtable_t { struct hlist_head *table; /* the hashtable itself with the buckets */ @@ -54,8 +48,6 @@ struct hashtable_t *hash_new(int size); /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash); -void bucket_free_rcu(struct rcu_head *rcu); - /* remove the hash structure. if hashdata_free_cb != NULL, this function will be * called to remove the elements inside of the hash. if you don't remove the * elements, memory might be leaked. */ @@ -63,8 +55,7 @@ static inline void hash_delete(struct hashtable_t *hash, hashdata_free_cb free_cb, void *arg) { struct hlist_head *head; - struct hlist_node *walk, *safe; - struct element_t *bucket; + struct hlist_node *node, *node_tmp; spinlock_t *list_lock; /* spinlock to protect write access */ int i; @@ -73,12 +64,11 @@ static inline void hash_delete(struct hashtable_t *hash, list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - if (free_cb) - free_cb(bucket->data, arg); + hlist_for_each_safe(node, node_tmp, head) { + hlist_del_rcu(node); - hlist_del_rcu(walk); - call_rcu(&bucket->rcu, bucket_free_rcu); + if (free_cb) + free_cb(node, arg); } spin_unlock_bh(list_lock); } @@ -89,12 +79,12 @@ static inline void hash_delete(struct hashtable_t *hash, /* adds data to the hashtable. returns 0 on success, -1 on error */ static inline int hash_add(struct hashtable_t *hash, hashdata_compare_cb compare, - hashdata_choose_cb choose, void *data) + hashdata_choose_cb choose, + void *data, struct hlist_node *data_node) { int index; struct hlist_head *head; - struct hlist_node *walk, *safe; - struct element_t *bucket; + struct hlist_node *node; spinlock_t *list_lock; /* spinlock to protect write access */ if (!hash) @@ -105,21 +95,17 @@ static inline int hash_add(struct hashtable_t *hash, list_lock = &hash->list_locks[index]; rcu_read_lock(); - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - if (compare(bucket->data, data)) - goto err_unlock; + __hlist_for_each_rcu(node, head) { + if (!compare(node, data)) + continue; + + goto err_unlock; } rcu_read_unlock(); /* no duplicate found in list, add new element */ - bucket = kmalloc(sizeof(struct element_t), GFP_ATOMIC); - if (!bucket) - goto err; - - bucket->data = data; - spin_lock_bh(list_lock); - hlist_add_head_rcu(&bucket->hlist, head); + hlist_add_head_rcu(data_node, head); spin_unlock_bh(list_lock); return 0; @@ -139,8 +125,7 @@ static inline void *hash_remove(struct hashtable_t *hash, hashdata_choose_cb choose, void *data) { size_t index; - struct hlist_node *walk; - struct element_t *bucket; + struct hlist_node *node; struct hlist_head *head; void *data_save = NULL; @@ -148,49 +133,17 @@ static inline void *hash_remove(struct hashtable_t *hash, head = &hash->table[index]; spin_lock_bh(&hash->list_locks[index]); - hlist_for_each_entry(bucket, walk, head, hlist) { - if (compare(bucket->data, data)) { - data_save = bucket->data; - hlist_del_rcu(walk); - call_rcu(&bucket->rcu, bucket_free_rcu); - break; - } + hlist_for_each(node, head) { + if (!compare(node, data)) + continue; + + data_save = node; + hlist_del_rcu(node); + break; } spin_unlock_bh(&hash->list_locks[index]); return data_save; } -/** - * finds data, based on the key in keydata. returns the found data on success, - * or NULL on error - * - * caller must lock with rcu_read_lock() / rcu_read_unlock() - **/ -static inline void *hash_find(struct hashtable_t *hash, - hashdata_compare_cb compare, - hashdata_choose_cb choose, void *keydata) -{ - int index; - struct hlist_head *head; - struct hlist_node *walk; - struct element_t *bucket; - void *bucket_data = NULL; - - if (!hash) - return NULL; - - index = choose(keydata , hash->size); - head = &hash->table[index]; - - hlist_for_each_entry(bucket, walk, head, hlist) { - if (compare(bucket->data, keydata)) { - bucket_data = bucket->data; - break; - } - } - - return bucket_data; -} - #endif /* _NET_BATMAN_ADV_HASH_H_ */ diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 7fa5bb8..139b733 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -222,14 +222,11 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, choose_orig, - icmp_packet->dst)); + orig_node = orig_hash_find(bat_priv, icmp_packet->dst); if (!orig_node) goto unlock; - kref_get(&orig_node->refcount); neigh_node = orig_node->router; if (!neigh_node) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index d9a8e31..bdcb399 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -140,9 +140,8 @@ void orig_node_free_ref(struct kref *refcount) void originator_free(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk, *safe; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; spinlock_t *list_lock; /* spinlock to protect write access */ struct orig_node *orig_node; int i; @@ -160,11 +159,10 @@ void originator_free(struct bat_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - orig_node = bucket->data; + hlist_for_each_entry_safe(orig_node, node, node_tmp, + head, hash_entry) { - hlist_del_rcu(walk); - call_rcu(&bucket->rcu, bucket_free_rcu); + hlist_del_rcu(node); kref_put(&orig_node->refcount, orig_node_free_ref); } spin_unlock_bh(list_lock); @@ -174,18 +172,6 @@ void originator_free(struct bat_priv *bat_priv) spin_unlock_bh(&bat_priv->orig_hash_lock); } -static void bucket_free_orig_rcu(struct rcu_head *rcu) -{ - struct element_t *bucket; - struct orig_node *orig_node; - - bucket = container_of(rcu, struct element_t, rcu); - orig_node = bucket->data; - - kref_put(&orig_node->refcount, orig_node_free_ref); - kfree(bucket); -} - /* this function finds or creates an originator entry for the given * address if it does not exits */ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) @@ -194,16 +180,9 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) int size; int hash_added; - rcu_read_lock(); - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, choose_orig, - addr)); - rcu_read_unlock(); - - if (orig_node) { - kref_get(&orig_node->refcount); + orig_node = orig_hash_find(bat_priv, addr); + if (orig_node) return orig_node; - } bat_dbg(DBG_BATMAN, bat_priv, "Creating new originator: %pM\n", addr); @@ -245,8 +224,8 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) if (!orig_node->bcast_own_sum) goto free_bcast_own; - hash_added = hash_add(bat_priv->orig_hash, compare_orig, choose_orig, - orig_node); + hash_added = hash_add(bat_priv->orig_hash, compare_orig, + choose_orig, orig_node, &orig_node->hash_entry); if (hash_added < 0) goto free_bcast_own_sum; @@ -346,9 +325,8 @@ static bool purge_orig_node(struct bat_priv *bat_priv, static void _purge_orig(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk, *safe; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; spinlock_t *list_lock; /* spinlock to protect write access */ struct orig_node *orig_node; int i; @@ -364,14 +342,14 @@ static void _purge_orig(struct bat_priv *bat_priv) list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - orig_node = bucket->data; - + hlist_for_each_entry_safe(orig_node, node, node_tmp, + head, hash_entry) { if (purge_orig_node(bat_priv, orig_node)) { if (orig_node->gw_flags) gw_node_delete(bat_priv, orig_node); - hlist_del_rcu(walk); - call_rcu(&bucket->rcu, bucket_free_orig_rcu); + hlist_del_rcu(node); + kref_put(&orig_node->refcount, + orig_node_free_ref); continue; } @@ -411,9 +389,8 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) struct net_device *net_dev = (struct net_device *)seq->private; struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk, *node; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; struct neigh_node *neigh_node; int batman_count = 0; @@ -447,9 +424,7 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(bucket, walk, head, hlist) { - orig_node = bucket->data; - + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { if (!orig_node->router) continue; @@ -468,7 +443,7 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node->addr, neigh_node->if_incoming->net_dev->name); - hlist_for_each_entry_rcu(neigh_node, node, + hlist_for_each_entry_rcu(neigh_node, node_tmp, &orig_node->neigh_list, list) { seq_printf(seq, " %pM (%3i)", neigh_node->addr, neigh_node->tq_avg); @@ -522,9 +497,8 @@ int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) { struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; int i, ret; @@ -536,9 +510,7 @@ int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(bucket, walk, head, hlist) { - orig_node = bucket->data; - + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = orig_node_add_if(orig_node, max_if_num); spin_unlock_bh(&orig_node->ogm_cnt_lock); @@ -614,9 +586,8 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) { struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct batman_if *batman_if_tmp; struct orig_node *orig_node; int i, ret; @@ -629,9 +600,7 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(bucket, walk, head, hlist) { - orig_node = bucket->data; - + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = orig_node_del_if(orig_node, max_if_num, batman_if->if_num); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 84d96e2..b4b9a09 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -22,6 +22,8 @@ #ifndef _NET_BATMAN_ADV_ORIGINATOR_H_ #define _NET_BATMAN_ADV_ORIGINATOR_H_ +#include "hash.h" + int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); @@ -38,8 +40,10 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num); /* returns 1 if they are the same originator */ -static inline int compare_orig(void *data1, void *data2) +static inline int compare_orig(struct hlist_node *node, void *data2) { + void *data1 = container_of(node, struct orig_node, hash_entry); + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); } @@ -64,4 +68,33 @@ static inline int choose_orig(void *data, int32_t size) return hash % size; } +static inline struct orig_node *orig_hash_find(struct bat_priv *bat_priv, + void *data) +{ + struct hashtable_t *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct hlist_node *node; + struct orig_node *orig_node, *orig_node_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = choose_orig(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + if (!compare_eth(orig_node, data)) + continue; + + orig_node_tmp = orig_node; + kref_get(&orig_node_tmp->refcount); + break; + } + rcu_read_unlock(); + + return orig_node_tmp; +} + #endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b54bf6e..fc4c12a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -39,9 +39,8 @@ void slide_own_bcast_window(struct batman_if *batman_if) { struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; unsigned long *word; int i; @@ -53,8 +52,7 @@ void slide_own_bcast_window(struct batman_if *batman_if) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(bucket, walk, head, hlist) { - orig_node = bucket->data; + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); word_index = batman_if->if_num * NUM_WORDS; word = &(orig_node->bcast_own[word_index]); @@ -908,14 +906,11 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, choose_orig, - icmp_packet->orig)); + orig_node = orig_hash_find(bat_priv, icmp_packet->orig); if (!orig_node) goto unlock; - kref_get(&orig_node->refcount); neigh_node = orig_node->router; if (!neigh_node) @@ -987,14 +982,11 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - icmp_packet->orig)); + orig_node = orig_hash_find(bat_priv, icmp_packet->orig); if (!orig_node) goto unlock; - kref_get(&orig_node->refcount); neigh_node = orig_node->router; if (!neigh_node) @@ -1098,13 +1090,11 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - icmp_packet->dst)); + orig_node = orig_hash_find(bat_priv, icmp_packet->dst); + if (!orig_node) goto unlock; - kref_get(&orig_node->refcount); neigh_node = orig_node->router; if (!neigh_node) @@ -1194,11 +1184,12 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, if (compare_eth(router_orig->primary_addr, router_orig->orig)) { primary_orig_node = router_orig; } else { - primary_orig_node = hash_find(bat_priv->orig_hash, compare_orig, - choose_orig, - router_orig->primary_addr); + primary_orig_node = orig_hash_find(bat_priv, + router_orig->primary_addr); if (!primary_orig_node) goto return_router; + + kref_put(&primary_orig_node->refcount, orig_node_free_ref); } /* with less than 2 candidates, we can't do any @@ -1344,13 +1335,11 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, /* get routing information */ spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - unicast_packet->dest)); + orig_node = orig_hash_find(bat_priv, unicast_packet->dest); + if (!orig_node) goto unlock; - kref_get(&orig_node->refcount); rcu_read_unlock(); /* find_router() increases neigh_nodes refcount if found. */ @@ -1508,14 +1497,11 @@ int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - bcast_packet->orig)); + orig_node = orig_hash_find(bat_priv, bcast_packet->orig); if (!orig_node) goto rcu_unlock; - kref_get(&orig_node->refcount); rcu_read_unlock(); spin_lock_bh(&orig_node->bcast_seqno_lock); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 77d0ee0..cd8a583 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -30,12 +30,85 @@ static void _hna_global_del_orig(struct bat_priv *bat_priv, struct hna_global_entry *hna_global_entry, char *message); +/* returns 1 if they are the same mac addr */ +static int compare_lhna(struct hlist_node *node, void *data2) +{ + void *data1 = container_of(node, struct hna_local_entry, hash_entry); + + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + +/* returns 1 if they are the same mac addr */ +static int compare_ghna(struct hlist_node *node, void *data2) +{ + void *data1 = container_of(node, struct hna_global_entry, hash_entry); + + return (memcmp(data1, data2, ETH_ALEN) == 0 ? 1 : 0); +} + static void hna_local_start_timer(struct bat_priv *bat_priv) { INIT_DELAYED_WORK(&bat_priv->hna_work, hna_local_purge); queue_delayed_work(bat_event_workqueue, &bat_priv->hna_work, 10 * HZ); } +static struct hna_local_entry *hna_local_hash_find(struct bat_priv *bat_priv, + void *data) +{ + struct hashtable_t *hash = bat_priv->hna_local_hash; + struct hlist_head *head; + struct hlist_node *node; + struct hna_local_entry *hna_local_entry, *hna_local_entry_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = choose_orig(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_local_entry, node, head, hash_entry) { + if (!compare_eth(hna_local_entry, data)) + continue; + + hna_local_entry_tmp = hna_local_entry; + break; + } + rcu_read_unlock(); + + return hna_local_entry_tmp; +} + +static struct hna_global_entry *hna_global_hash_find(struct bat_priv *bat_priv, + void *data) +{ + struct hashtable_t *hash = bat_priv->hna_global_hash; + struct hlist_head *head; + struct hlist_node *node; + struct hna_global_entry *hna_global_entry; + struct hna_global_entry *hna_global_entry_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = choose_orig(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_global_entry, node, head, hash_entry) { + if (!compare_eth(hna_global_entry, data)) + continue; + + hna_global_entry_tmp = hna_global_entry; + break; + } + rcu_read_unlock(); + + return hna_global_entry_tmp; +} + int hna_local_init(struct bat_priv *bat_priv) { if (bat_priv->hna_local_hash) @@ -60,12 +133,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) int required_bytes; spin_lock_bh(&bat_priv->hna_lhash_lock); - rcu_read_lock(); - hna_local_entry = - ((struct hna_local_entry *)hash_find(bat_priv->hna_local_hash, - compare_orig, choose_orig, - addr)); - rcu_read_unlock(); + hna_local_entry = hna_local_hash_find(bat_priv, addr); spin_unlock_bh(&bat_priv->hna_lhash_lock); if (hna_local_entry) { @@ -108,8 +176,8 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) spin_lock_bh(&bat_priv->hna_lhash_lock); - hash_add(bat_priv->hna_local_hash, compare_orig, choose_orig, - hna_local_entry); + hash_add(bat_priv->hna_local_hash, compare_lhna, choose_orig, + hna_local_entry, &hna_local_entry->hash_entry); bat_priv->num_local_hna++; atomic_set(&bat_priv->hna_local_changed, 1); @@ -118,11 +186,7 @@ void hna_local_add(struct net_device *soft_iface, uint8_t *addr) /* remove address from global hash if present */ spin_lock_bh(&bat_priv->hna_ghash_lock); - rcu_read_lock(); - hna_global_entry = ((struct hna_global_entry *) - hash_find(bat_priv->hna_global_hash, - compare_orig, choose_orig, addr)); - rcu_read_unlock(); + hna_global_entry = hna_global_hash_find(bat_priv, addr); if (hna_global_entry) _hna_global_del_orig(bat_priv, hna_global_entry, @@ -136,28 +200,27 @@ int hna_local_fill_buffer(struct bat_priv *bat_priv, { struct hashtable_t *hash = bat_priv->hna_local_hash; struct hna_local_entry *hna_local_entry; - struct element_t *bucket; - int i; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - int count = 0; + int i, count = 0; spin_lock_bh(&bat_priv->hna_lhash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_local_entry, node, + head, hash_entry) { if (buff_len < (count + 1) * ETH_ALEN) break; - hna_local_entry = bucket->data; memcpy(buff + (count * ETH_ALEN), hna_local_entry->addr, ETH_ALEN); count++; } + rcu_read_unlock(); } /* if we did not get all new local hnas see you next time ;-) */ @@ -174,12 +237,11 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset) struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->hna_local_hash; struct hna_local_entry *hna_local_entry; - int i; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; size_t buf_size, pos; char *buff; + int i; if (!bat_priv->primary_if) { return seq_printf(seq, "BATMAN mesh %s disabled - " @@ -198,8 +260,10 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each(walk, head) + rcu_read_lock(); + __hlist_for_each_rcu(node, head) buf_size += 21; + rcu_read_unlock(); } buff = kmalloc(buf_size, GFP_ATOMIC); @@ -207,18 +271,20 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset) spin_unlock_bh(&bat_priv->hna_lhash_lock); return -ENOMEM; } + buff[0] = '\0'; pos = 0; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - hna_local_entry = bucket->data; - + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_local_entry, node, + head, hash_entry) { pos += snprintf(buff + pos, 22, " * %pM\n", hna_local_entry->addr); } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->hna_lhash_lock); @@ -228,9 +294,10 @@ int hna_local_seq_print_text(struct seq_file *seq, void *offset) return 0; } -static void _hna_local_del(void *data, void *arg) +static void _hna_local_del(struct hlist_node *node, void *arg) { struct bat_priv *bat_priv = (struct bat_priv *)arg; + void *data = container_of(node, struct hna_local_entry, hash_entry); kfree(data); bat_priv->num_local_hna--; @@ -244,9 +311,9 @@ static void hna_local_del(struct bat_priv *bat_priv, bat_dbg(DBG_ROUTES, bat_priv, "Deleting local hna entry (%pM): %s\n", hna_local_entry->addr, message); - hash_remove(bat_priv->hna_local_hash, compare_orig, choose_orig, + hash_remove(bat_priv->hna_local_hash, compare_lhna, choose_orig, hna_local_entry->addr); - _hna_local_del(hna_local_entry, bat_priv); + _hna_local_del(&hna_local_entry->hash_entry, bat_priv); } void hna_local_remove(struct bat_priv *bat_priv, @@ -256,11 +323,7 @@ void hna_local_remove(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_lhash_lock); - rcu_read_lock(); - hna_local_entry = (struct hna_local_entry *) - hash_find(bat_priv->hna_local_hash, compare_orig, choose_orig, - addr); - rcu_read_unlock(); + hna_local_entry = hna_local_hash_find(bat_priv, addr); if (hna_local_entry) hna_local_del(bat_priv, hna_local_entry, message); @@ -276,27 +339,29 @@ static void hna_local_purge(struct work_struct *work) container_of(delayed_work, struct bat_priv, hna_work); struct hashtable_t *hash = bat_priv->hna_local_hash; struct hna_local_entry *hna_local_entry; - int i; - struct hlist_node *walk, *safe; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; unsigned long timeout; + int i; spin_lock_bh(&bat_priv->hna_lhash_lock); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - hna_local_entry = bucket->data; + hlist_for_each_entry_safe(hna_local_entry, node, node_tmp, + head, hash_entry) { + if (hna_local_entry->never_purge) + continue; timeout = hna_local_entry->last_seen; timeout += LOCAL_HNA_TIMEOUT * HZ; - if ((!hna_local_entry->never_purge) && - time_after(jiffies, timeout)) - hna_local_del(bat_priv, hna_local_entry, - "address timed out"); + if (time_before(jiffies, timeout)) + continue; + + hna_local_del(bat_priv, hna_local_entry, + "address timed out"); } } @@ -340,11 +405,7 @@ void hna_global_add_orig(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_ghash_lock); hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); - rcu_read_lock(); - hna_global_entry = (struct hna_global_entry *) - hash_find(bat_priv->hna_global_hash, compare_orig, - choose_orig, hna_ptr); - rcu_read_unlock(); + hna_global_entry = hna_global_hash_find(bat_priv, hna_ptr); if (!hna_global_entry) { spin_unlock_bh(&bat_priv->hna_ghash_lock); @@ -364,8 +425,9 @@ void hna_global_add_orig(struct bat_priv *bat_priv, hna_global_entry->addr, orig_node->orig); spin_lock_bh(&bat_priv->hna_ghash_lock); - hash_add(bat_priv->hna_global_hash, compare_orig, - choose_orig, hna_global_entry); + hash_add(bat_priv->hna_global_hash, compare_ghna, + choose_orig, hna_global_entry, + &hna_global_entry->hash_entry); } @@ -376,11 +438,7 @@ void hna_global_add_orig(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->hna_lhash_lock); hna_ptr = hna_buff + (hna_buff_count * ETH_ALEN); - rcu_read_lock(); - hna_local_entry = (struct hna_local_entry *) - hash_find(bat_priv->hna_local_hash, compare_orig, - choose_orig, hna_ptr); - rcu_read_unlock(); + hna_local_entry = hna_local_hash_find(bat_priv, hna_ptr); if (hna_local_entry) hna_local_del(bat_priv, hna_local_entry, @@ -410,12 +468,11 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset) struct bat_priv *bat_priv = netdev_priv(net_dev); struct hashtable_t *hash = bat_priv->hna_global_hash; struct hna_global_entry *hna_global_entry; - int i; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; size_t buf_size, pos; char *buff; + int i; if (!bat_priv->primary_if) { return seq_printf(seq, "BATMAN mesh %s disabled - " @@ -433,8 +490,10 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each(walk, head) + rcu_read_lock(); + __hlist_for_each_rcu(node, head) buf_size += 43; + rcu_read_unlock(); } buff = kmalloc(buf_size, GFP_ATOMIC); @@ -448,14 +507,15 @@ int hna_global_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - hna_global_entry = bucket->data; - + rcu_read_lock(); + hlist_for_each_entry_rcu(hna_global_entry, node, + head, hash_entry) { pos += snprintf(buff + pos, 44, " * %pM via %pM\n", hna_global_entry->addr, hna_global_entry->orig_node->orig); } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->hna_ghash_lock); @@ -474,7 +534,7 @@ static void _hna_global_del_orig(struct bat_priv *bat_priv, hna_global_entry->addr, hna_global_entry->orig_node->orig, message); - hash_remove(bat_priv->hna_global_hash, compare_orig, choose_orig, + hash_remove(bat_priv->hna_global_hash, compare_ghna, choose_orig, hna_global_entry->addr); kfree(hna_global_entry); } @@ -493,11 +553,7 @@ void hna_global_del_orig(struct bat_priv *bat_priv, while ((hna_buff_count + 1) * ETH_ALEN <= orig_node->hna_buff_len) { hna_ptr = orig_node->hna_buff + (hna_buff_count * ETH_ALEN); - rcu_read_lock(); - hna_global_entry = (struct hna_global_entry *) - hash_find(bat_priv->hna_global_hash, compare_orig, - choose_orig, hna_ptr); - rcu_read_unlock(); + hna_global_entry = hna_global_hash_find(bat_priv, hna_ptr); if ((hna_global_entry) && (hna_global_entry->orig_node == orig_node)) @@ -514,8 +570,10 @@ void hna_global_del_orig(struct bat_priv *bat_priv, orig_node->hna_buff = NULL; } -static void hna_global_del(void *data, void *arg) +static void hna_global_del(struct hlist_node *node, void *arg) { + void *data = container_of(node, struct hna_global_entry, hash_entry); + kfree(data); } @@ -533,11 +591,11 @@ struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) struct hna_global_entry *hna_global_entry; spin_lock_bh(&bat_priv->hna_ghash_lock); - rcu_read_lock(); - hna_global_entry = (struct hna_global_entry *) - hash_find(bat_priv->hna_global_hash, - compare_orig, choose_orig, addr); - rcu_read_unlock(); + hna_global_entry = hna_global_hash_find(bat_priv, addr); + + if (hna_global_entry) + kref_get(&hna_global_entry->orig_node->refcount); + spin_unlock_bh(&bat_priv->hna_ghash_lock); if (!hna_global_entry) diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 3dd5e77..40365b8 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -85,6 +85,7 @@ struct orig_node { struct list_head frag_list; spinlock_t neigh_list_lock; /* protects neighbor list */ struct kref refcount; + struct hlist_node hash_entry; struct bat_priv *bat_priv; unsigned long last_frag_packet; spinlock_t ogm_cnt_lock; /* protects: bcast_own, bcast_own_sum, @@ -194,11 +195,13 @@ struct hna_local_entry { uint8_t addr[ETH_ALEN]; unsigned long last_seen; char never_purge; + struct hlist_node hash_entry; }; struct hna_global_entry { uint8_t addr[ETH_ALEN]; struct orig_node *orig_node; + struct hlist_node hash_entry; }; /** @@ -248,6 +251,7 @@ struct vis_info { * from. we should not reply to them. */ struct list_head send_list; struct kref refcount; + struct hlist_node hash_entry; struct bat_priv *bat_priv; /* this packet might be part of the vis send queue. */ struct sk_buff *skb_packet; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 0603cea..2d5daac 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -178,17 +178,11 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, (struct unicast_frag_packet *)skb->data; *new_skb = NULL; + spin_lock_bh(&bat_priv->orig_hash_lock); - rcu_read_lock(); - orig_node = ((struct orig_node *) - hash_find(bat_priv->orig_hash, compare_orig, choose_orig, - unicast_packet->orig)); - rcu_read_unlock(); - - if (!orig_node) { - pr_debug("couldn't find originator in orig_hash\n"); - goto out; - } + orig_node = orig_hash_find(bat_priv, unicast_packet->orig); + if (!orig_node) + goto unlock; orig_node->last_frag_packet = jiffies; @@ -212,9 +206,12 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, /* if not, merge failed */ if (*new_skb) ret = NET_RX_SUCCESS; -out: - spin_unlock_bh(&bat_priv->orig_hash_lock); +unlock: + spin_unlock_bh(&bat_priv->orig_hash_lock); +out: + if (orig_node) + kref_put(&orig_node->refcount, orig_node_free_ref); return ret; } diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index fc97329..d179aca 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -67,11 +67,12 @@ static void free_info(struct kref *ref) } /* Compare two vis packets, used by the hashing algorithm */ -static int vis_info_cmp(void *data1, void *data2) +static int vis_info_cmp(struct hlist_node *node, void *data2) { struct vis_info *d1, *d2; struct vis_packet *p1, *p2; - d1 = data1; + + d1 = container_of(node, struct vis_info, hash_entry); d2 = data2; p1 = (struct vis_packet *)d1->skb_packet->data; p2 = (struct vis_packet *)d2->skb_packet->data; @@ -103,6 +104,34 @@ static int vis_info_choose(void *data, int size) return hash % size; } +static struct vis_info *vis_hash_find(struct bat_priv *bat_priv, + void *data) +{ + struct hashtable_t *hash = bat_priv->vis_hash; + struct hlist_head *head; + struct hlist_node *node; + struct vis_info *vis_info, *vis_info_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = vis_info_choose(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(vis_info, node, head, hash_entry) { + if (!vis_info_cmp(node, data)) + continue; + + vis_info_tmp = vis_info; + break; + } + rcu_read_unlock(); + + return vis_info_tmp; +} + /* insert interface to the list of interfaces of one originator, if it * does not already exist in the list */ static void vis_data_insert_interface(const uint8_t *interface, @@ -174,9 +203,8 @@ static ssize_t vis_data_read_entry(char *buff, struct vis_info_entry *entry, int vis_seq_print_text(struct seq_file *seq, void *offset) { - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct vis_info *info; struct vis_packet *packet; struct vis_info_entry *entries; @@ -202,8 +230,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - info = bucket->data; + rcu_read_lock(); + hlist_for_each_entry_rcu(info, node, head, hash_entry) { packet = (struct vis_packet *)info->skb_packet->data; entries = (struct vis_info_entry *) ((char *)packet + sizeof(struct vis_packet)); @@ -235,6 +263,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) kfree(entry); } } + rcu_read_unlock(); } buff = kmalloc(buf_size, GFP_ATOMIC); @@ -248,8 +277,8 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - info = bucket->data; + rcu_read_lock(); + hlist_for_each_entry_rcu(info, node, head, hash_entry) { packet = (struct vis_packet *)info->skb_packet->data; entries = (struct vis_info_entry *) ((char *)packet + sizeof(struct vis_packet)); @@ -290,6 +319,7 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) kfree(entry); } } + rcu_read_unlock(); } spin_unlock_bh(&bat_priv->vis_hash_lock); @@ -380,10 +410,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, sizeof(struct vis_packet)); memcpy(search_packet->vis_orig, vis_packet->vis_orig, ETH_ALEN); - rcu_read_lock(); - old_info = hash_find(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, - &search_elem); - rcu_read_unlock(); + old_info = vis_hash_find(bat_priv, &search_elem); kfree_skb(search_elem.skb_packet); if (old_info) { @@ -443,7 +470,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, /* try to add it */ hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, - info); + info, &info->hash_entry); if (hash_added < 0) { /* did not work (for some reason) */ kref_put(&old_info->refcount, free_info); @@ -530,9 +557,8 @@ static int find_best_vis_server(struct bat_priv *bat_priv, struct vis_info *info) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; struct vis_packet *packet; int best_tq = -1, i; @@ -543,11 +569,10 @@ static int find_best_vis_server(struct bat_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(bucket, walk, head, hlist) { - orig_node = bucket->data; + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { if ((orig_node) && (orig_node->router) && - (orig_node->flags & VIS_SERVER) && - (orig_node->router->tq_avg > best_tq)) { + (orig_node->flags & VIS_SERVER) && + (orig_node->router->tq_avg > best_tq)) { best_tq = orig_node->router->tq_avg; memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); @@ -576,9 +601,8 @@ static bool vis_packet_full(struct vis_info *info) static int generate_vis_packet(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; struct neigh_node *neigh_node; struct vis_info *info = (struct vis_info *)bat_priv->my_vis_info; @@ -610,8 +634,7 @@ static int generate_vis_packet(struct bat_priv *bat_priv) head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(bucket, walk, head, hlist) { - orig_node = bucket->data; + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { neigh_node = orig_node->router; if (!neigh_node) @@ -653,8 +676,7 @@ static int generate_vis_packet(struct bat_priv *bat_priv) for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry(bucket, walk, head, hlist) { - hna_local_entry = bucket->data; + hlist_for_each_entry(hna_local_entry, node, head, hash_entry) { entry = (struct vis_info_entry *) skb_put(info->skb_packet, sizeof(*entry)); @@ -680,25 +702,22 @@ static void purge_vis_packets(struct bat_priv *bat_priv) { int i; struct hashtable_t *hash = bat_priv->vis_hash; - struct hlist_node *walk, *safe; + struct hlist_node *node, *node_tmp; struct hlist_head *head; - struct element_t *bucket; struct vis_info *info; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; - hlist_for_each_entry_safe(bucket, walk, safe, head, hlist) { - info = bucket->data; - + hlist_for_each_entry_safe(info, node, node_tmp, + head, hash_entry) { /* never purge own data. */ if (info == bat_priv->my_vis_info) continue; if (time_after(jiffies, info->first_seen + VIS_TIMEOUT * HZ)) { - hlist_del(walk); - kfree(bucket); + hlist_del(node); send_list_del(info); kref_put(&info->refcount, free_info); } @@ -710,9 +729,8 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, struct vis_info *info) { struct hashtable_t *hash = bat_priv->orig_hash; - struct hlist_node *walk; + struct hlist_node *node; struct hlist_head *head; - struct element_t *bucket; struct orig_node *orig_node; struct vis_packet *packet; struct sk_buff *skb; @@ -729,9 +747,7 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, head = &hash->table[i]; rcu_read_lock(); - hlist_for_each_entry_rcu(bucket, walk, head, hlist) { - orig_node = bucket->data; - + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { /* if it's a vis server and reachable, send it. */ if ((!orig_node) || (!orig_node->router)) continue; @@ -774,14 +790,11 @@ static void unicast_vis_packet(struct bat_priv *bat_priv, spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); - orig_node = ((struct orig_node *)hash_find(bat_priv->orig_hash, - compare_orig, choose_orig, - packet->target_orig)); + orig_node = orig_hash_find(bat_priv, packet->target_orig); if (!orig_node) goto unlock; - kref_get(&orig_node->refcount); neigh_node = orig_node->router; if (!neigh_node) @@ -925,7 +938,8 @@ int vis_init(struct bat_priv *bat_priv) INIT_LIST_HEAD(&bat_priv->vis_send_list); hash_added = hash_add(bat_priv->vis_hash, vis_info_cmp, vis_info_choose, - bat_priv->my_vis_info); + bat_priv->my_vis_info, + &bat_priv->my_vis_info->hash_entry); if (hash_added < 0) { pr_err("Can't add own vis packet into hash\n"); /* not in hash, need to remove it manually. */ @@ -947,10 +961,11 @@ err: } /* Decrease the reference count on a hash item info */ -static void free_info_ref(void *data, void *arg) +static void free_info_ref(struct hlist_node *node, void *arg) { - struct vis_info *info = data; + struct vis_info *info; + info = container_of(node, struct vis_info, hash_entry); send_list_del(info); kref_put(&info->refcount, free_info); } -- cgit v1.1 From 7b36e8eef989fc59535b4f1d3fc0f83afaf419d4 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 18 Feb 2011 12:28:10 +0000 Subject: batman-adv: Correct rcu refcounting for orig_node It might be possible that 2 threads access the same data in the same rcu grace period. The first thread calls call_rcu() to decrement the refcount and free the data while the second thread increases the refcount to use the data. To avoid this race condition all refcount operations have to be atomic. Reported-by: Sven Eckelmann Signed-off-by: Marek Lindner --- net/batman-adv/gateway_client.c | 6 ++++-- net/batman-adv/icmp_socket.c | 2 +- net/batman-adv/originator.c | 21 +++++++++++++-------- net/batman-adv/originator.h | 6 ++++-- net/batman-adv/routing.c | 24 ++++++++++++------------ net/batman-adv/translation-table.c | 15 +++++++++------ net/batman-adv/types.h | 3 ++- net/batman-adv/unicast.c | 6 +++--- net/batman-adv/vis.c | 2 +- 9 files changed, 49 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 41eba8a..3cc4355 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -53,9 +53,11 @@ void *gw_get_selected(struct bat_priv *bat_priv) goto out; orig_node = curr_gateway_tmp->orig_node; + if (!orig_node) + goto out; - if (orig_node) - kref_get(&orig_node->refcount); + if (!atomic_inc_not_zero(&orig_node->refcount)) + orig_node = NULL; out: rcu_read_unlock(); diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 139b733..a0a35b1 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -271,7 +271,7 @@ out: if (neigh_node) neigh_node_free_ref(neigh_node); if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return len; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index bdcb399..a70debe 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -102,13 +102,13 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, return neigh_node; } -void orig_node_free_ref(struct kref *refcount) +static void orig_node_free_rcu(struct rcu_head *rcu) { struct hlist_node *node, *node_tmp; struct neigh_node *neigh_node, *tmp_neigh_node; struct orig_node *orig_node; - orig_node = container_of(refcount, struct orig_node, refcount); + orig_node = container_of(rcu, struct orig_node, rcu); spin_lock_bh(&orig_node->neigh_list_lock); @@ -137,6 +137,12 @@ void orig_node_free_ref(struct kref *refcount) kfree(orig_node); } +void orig_node_free_ref(struct orig_node *orig_node) +{ + if (atomic_dec_and_test(&orig_node->refcount)) + call_rcu(&orig_node->rcu, orig_node_free_rcu); +} + void originator_free(struct bat_priv *bat_priv) { struct hashtable_t *hash = bat_priv->orig_hash; @@ -163,7 +169,7 @@ void originator_free(struct bat_priv *bat_priv) head, hash_entry) { hlist_del_rcu(node); - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); } spin_unlock_bh(list_lock); } @@ -196,7 +202,9 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) spin_lock_init(&orig_node->ogm_cnt_lock); spin_lock_init(&orig_node->bcast_seqno_lock); spin_lock_init(&orig_node->neigh_list_lock); - kref_init(&orig_node->refcount); + + /* extra reference for return */ + atomic_set(&orig_node->refcount, 2); orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); @@ -229,8 +237,6 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr) if (hash_added < 0) goto free_bcast_own_sum; - /* extra reference for return */ - kref_get(&orig_node->refcount); return orig_node; free_bcast_own_sum: kfree(orig_node->bcast_own_sum); @@ -348,8 +354,7 @@ static void _purge_orig(struct bat_priv *bat_priv) if (orig_node->gw_flags) gw_node_delete(bat_priv, orig_node); hlist_del_rcu(node); - kref_put(&orig_node->refcount, - orig_node_free_ref); + orig_node_free_ref(orig_node); continue; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index b4b9a09..3d7a39d 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -27,7 +27,7 @@ int originator_init(struct bat_priv *bat_priv); void originator_free(struct bat_priv *bat_priv); void purge_orig_ref(struct bat_priv *bat_priv); -void orig_node_free_ref(struct kref *refcount); +void orig_node_free_ref(struct orig_node *orig_node); struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, @@ -88,8 +88,10 @@ static inline struct orig_node *orig_hash_find(struct bat_priv *bat_priv, if (!compare_eth(orig_node, data)) continue; + if (!atomic_inc_not_zero(&orig_node->refcount)) + continue; + orig_node_tmp = orig_node; - kref_get(&orig_node_tmp->refcount); break; } rcu_read_unlock(); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index fc4c12a..9863c03 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -420,7 +420,7 @@ static void update_orig(struct bat_priv *bat_priv, neigh_node = create_neighbor(orig_node, orig_tmp, ethhdr->h_source, if_incoming); - kref_put(&orig_tmp->refcount, orig_node_free_ref); + orig_node_free_ref(orig_tmp); if (!neigh_node) goto unlock; @@ -604,7 +604,7 @@ static char count_real_packets(struct ethhdr *ethhdr, out: spin_unlock_bh(&orig_node->ogm_cnt_lock); - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return ret; } @@ -730,7 +730,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, bat_dbg(DBG_BATMAN, bat_priv, "Drop packet: " "originator packet from myself (via neighbor)\n"); - kref_put(&orig_neigh_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_neigh_node); return; } @@ -835,10 +835,10 @@ void receive_bat_packet(struct ethhdr *ethhdr, 0, hna_buff_len, if_incoming); out_neigh: - if (!is_single_hop_neigh) - kref_put(&orig_neigh_node->refcount, orig_node_free_ref); + if ((orig_neigh_node) && (!is_single_hop_neigh)) + orig_node_free_ref(orig_neigh_node); out: - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); } int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) @@ -952,7 +952,7 @@ out: if (neigh_node) neigh_node_free_ref(neigh_node); if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return ret; } @@ -1028,7 +1028,7 @@ out: if (neigh_node) neigh_node_free_ref(neigh_node); if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return ret; } @@ -1134,7 +1134,7 @@ out: if (neigh_node) neigh_node_free_ref(neigh_node); if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return ret; } @@ -1189,7 +1189,7 @@ struct neigh_node *find_router(struct bat_priv *bat_priv, if (!primary_orig_node) goto return_router; - kref_put(&primary_orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(primary_orig_node); } /* with less than 2 candidates, we can't do any @@ -1401,7 +1401,7 @@ out: if (neigh_node) neigh_node_free_ref(neigh_node); if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return ret; } @@ -1543,7 +1543,7 @@ spin_unlock: spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return ret; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index cd8a583..8d15b48 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -589,17 +589,20 @@ void hna_global_free(struct bat_priv *bat_priv) struct orig_node *transtable_search(struct bat_priv *bat_priv, uint8_t *addr) { struct hna_global_entry *hna_global_entry; + struct orig_node *orig_node = NULL; spin_lock_bh(&bat_priv->hna_ghash_lock); hna_global_entry = hna_global_hash_find(bat_priv, addr); - if (hna_global_entry) - kref_get(&hna_global_entry->orig_node->refcount); + if (!hna_global_entry) + goto out; - spin_unlock_bh(&bat_priv->hna_ghash_lock); + if (!atomic_inc_not_zero(&hna_global_entry->orig_node->refcount)) + goto out; - if (!hna_global_entry) - return NULL; + orig_node = hna_global_entry->orig_node; - return hna_global_entry->orig_node; +out: + spin_unlock_bh(&bat_priv->hna_ghash_lock); + return orig_node; } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 40365b8..1be76fe 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -84,7 +84,8 @@ struct orig_node { struct hlist_head neigh_list; struct list_head frag_list; spinlock_t neigh_list_lock; /* protects neighbor list */ - struct kref refcount; + atomic_t refcount; + struct rcu_head rcu; struct hlist_node hash_entry; struct bat_priv *bat_priv; unsigned long last_frag_packet; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 2d5daac..2ab8198 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -211,7 +211,7 @@ unlock: spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return ret; } @@ -280,7 +280,7 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct unicast_packet *unicast_packet; - struct orig_node *orig_node = NULL; + struct orig_node *orig_node; struct batman_if *batman_if; struct neigh_node *neigh_node; int data_len = skb->len; @@ -347,7 +347,7 @@ out: if (neigh_node) neigh_node_free_ref(neigh_node); if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); if (ret == 1) kfree_skb(skb); return ret; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index d179aca..8972242 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -826,7 +826,7 @@ out: if (neigh_node) neigh_node_free_ref(neigh_node); if (orig_node) - kref_put(&orig_node->refcount, orig_node_free_ref); + orig_node_free_ref(orig_node); return; } -- cgit v1.1 From 1605d0d60b66b9461cfcff86f8cfc80964f23430 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 18 Feb 2011 12:28:11 +0000 Subject: batman-adv: increase refcount in create_neighbor to be consistent Signed-off-by: Marek Lindner --- net/batman-adv/originator.c | 4 ++- net/batman-adv/routing.c | 63 +++++++++++++++++++-------------------------- 2 files changed, 30 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a70debe..69e27a24 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -94,7 +94,9 @@ struct neigh_node *create_neighbor(struct orig_node *orig_node, memcpy(neigh_node->addr, neigh, ETH_ALEN); neigh_node->orig_node = orig_neigh_node; neigh_node->if_incoming = if_incoming; - atomic_set(&neigh_node->refcount, 1); + + /* extra reference for return */ + atomic_set(&neigh_node->refcount, 2); spin_lock_bh(&orig_node->neigh_list_lock); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 9863c03..c4b7ae9 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -150,7 +150,7 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, struct batman_if *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct neigh_node *neigh_node = NULL, *tmp_neigh_node = NULL; + struct neigh_node *neigh_node = NULL, *tmp_neigh_node; struct hlist_node *node; unsigned char total_count; uint8_t orig_eq_count, neigh_rq_count, tq_own; @@ -161,27 +161,27 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_node->neigh_list, list) { - if (compare_eth(tmp_neigh_node->addr, - orig_neigh_node->orig) && - (tmp_neigh_node->if_incoming == if_incoming)) - neigh_node = tmp_neigh_node; + if (!compare_eth(tmp_neigh_node->addr, + orig_neigh_node->orig)) + continue; + + if (tmp_neigh_node->if_incoming != if_incoming) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + neigh_node = tmp_neigh_node; } + rcu_read_unlock(); if (!neigh_node) neigh_node = create_neighbor(orig_node, orig_neigh_node, orig_neigh_node->orig, if_incoming); - /* create_neighbor failed, return 0 */ if (!neigh_node) - goto unlock; - - if (!atomic_inc_not_zero(&neigh_node->refcount)) { - neigh_node = NULL; - goto unlock; - } - - rcu_read_unlock(); + goto out; neigh_node->last_valid = jiffies; } else { @@ -190,27 +190,27 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, hlist_for_each_entry_rcu(tmp_neigh_node, node, &orig_neigh_node->neigh_list, list) { - if (compare_eth(tmp_neigh_node->addr, - orig_neigh_node->orig) && - (tmp_neigh_node->if_incoming == if_incoming)) - neigh_node = tmp_neigh_node; + if (!compare_eth(tmp_neigh_node->addr, + orig_neigh_node->orig)) + continue; + + if (tmp_neigh_node->if_incoming != if_incoming) + continue; + + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; + + neigh_node = tmp_neigh_node; } + rcu_read_unlock(); if (!neigh_node) neigh_node = create_neighbor(orig_neigh_node, orig_neigh_node, orig_neigh_node->orig, if_incoming); - /* create_neighbor failed, return 0 */ if (!neigh_node) - goto unlock; - - if (!atomic_inc_not_zero(&neigh_node->refcount)) { - neigh_node = NULL; - goto unlock; - } - - rcu_read_unlock(); + goto out; } orig_node->last_valid = jiffies; @@ -265,10 +265,6 @@ static int is_bidirectional_neigh(struct orig_node *orig_node, if (batman_packet->tq >= TQ_TOTAL_BIDRECT_LIMIT) ret = 1; - goto out; - -unlock: - rcu_read_unlock(); out: if (neigh_node) neigh_node_free_ref(neigh_node); @@ -423,11 +419,6 @@ static void update_orig(struct bat_priv *bat_priv, orig_node_free_ref(orig_tmp); if (!neigh_node) goto unlock; - - if (!atomic_inc_not_zero(&neigh_node->refcount)) { - neigh_node = NULL; - goto unlock; - } } else bat_dbg(DBG_BATMAN, bat_priv, "Updating existing last-hop neighbor of originator\n"); -- cgit v1.1 From d0072609baebaffb522083d367f4f195187f60f8 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Wed, 19 Jan 2011 20:01:44 +0000 Subject: batman-adv: remove orig_hash spinlock Signed-off-by: Marek Lindner --- net/batman-adv/icmp_socket.c | 16 +++------- net/batman-adv/main.c | 1 - net/batman-adv/originator.c | 21 ------------- net/batman-adv/routing.c | 75 +++++++------------------------------------- net/batman-adv/types.h | 1 - net/batman-adv/unicast.c | 36 ++++++++------------- net/batman-adv/vis.c | 36 ++++++--------------- 7 files changed, 38 insertions(+), 148 deletions(-) (limited to 'net') diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index a0a35b1..34ce56c 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -158,9 +158,7 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, struct orig_node *orig_node = NULL; struct neigh_node *neigh_node = NULL; - struct batman_if *batman_if; size_t packet_len = sizeof(struct icmp_packet); - uint8_t dstaddr[ETH_ALEN]; if (len < sizeof(struct icmp_packet)) { bat_dbg(DBG_BATMAN, bat_priv, @@ -220,7 +218,6 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) goto dst_unreach; - spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = orig_hash_find(bat_priv, icmp_packet->dst); @@ -239,14 +236,10 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, rcu_read_unlock(); - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); - - if (!batman_if) + if (!neigh_node->if_incoming) goto dst_unreach; - if (batman_if->if_status != IF_ACTIVE) + if (neigh_node->if_incoming->if_status != IF_ACTIVE) goto dst_unreach; memcpy(icmp_packet->orig, @@ -254,14 +247,13 @@ static ssize_t bat_socket_write(struct file *file, const char __user *buff, if (packet_len == sizeof(struct icmp_packet_rr)) memcpy(icmp_packet->rr, - batman_if->net_dev->dev_addr, ETH_ALEN); + neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN); - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); goto out; unlock: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); dst_unreach: icmp_packet->msg_type = DESTINATION_UNREACHABLE; bat_socket_add_packet(socket_client, icmp_packet, packet_len); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 3f977ea..09c21f2 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -79,7 +79,6 @@ int mesh_init(struct net_device *soft_iface) { struct bat_priv *bat_priv = netdev_priv(soft_iface); - spin_lock_init(&bat_priv->orig_hash_lock); spin_lock_init(&bat_priv->forw_bat_list_lock); spin_lock_init(&bat_priv->forw_bcast_list_lock); spin_lock_init(&bat_priv->hna_lhash_lock); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 69e27a24..a8d0262 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -44,18 +44,15 @@ int originator_init(struct bat_priv *bat_priv) if (bat_priv->orig_hash) return 1; - spin_lock_bh(&bat_priv->orig_hash_lock); bat_priv->orig_hash = hash_new(1024); if (!bat_priv->orig_hash) goto err; - spin_unlock_bh(&bat_priv->orig_hash_lock); start_purge_timer(bat_priv); return 1; err: - spin_unlock_bh(&bat_priv->orig_hash_lock); return 0; } @@ -159,7 +156,6 @@ void originator_free(struct bat_priv *bat_priv) cancel_delayed_work_sync(&bat_priv->orig_work); - spin_lock_bh(&bat_priv->orig_hash_lock); bat_priv->orig_hash = NULL; for (i = 0; i < hash->size; i++) { @@ -177,7 +173,6 @@ void originator_free(struct bat_priv *bat_priv) } hash_destroy(hash); - spin_unlock_bh(&bat_priv->orig_hash_lock); } /* this function finds or creates an originator entry for the given @@ -342,8 +337,6 @@ static void _purge_orig(struct bat_priv *bat_priv) if (!hash) return; - spin_lock_bh(&bat_priv->orig_hash_lock); - /* for all origins... */ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -367,8 +360,6 @@ static void _purge_orig(struct bat_priv *bat_priv) spin_unlock_bh(list_lock); } - spin_unlock_bh(&bat_priv->orig_hash_lock); - gw_node_purge(bat_priv); gw_election(bat_priv); @@ -425,8 +416,6 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) "Originator", "last-seen", "#", TQ_MAX_VALUE, "Nexthop", "outgoingIF", "Potential nexthops"); - spin_lock_bh(&bat_priv->orig_hash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -462,8 +451,6 @@ int orig_seq_print_text(struct seq_file *seq, void *offset) rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->orig_hash_lock); - if ((batman_count == 0)) seq_printf(seq, "No batman nodes in range ...\n"); @@ -511,8 +498,6 @@ int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) /* resize all orig nodes because orig_node->bcast_own(_sum) depend on * if_num */ - spin_lock_bh(&bat_priv->orig_hash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -528,12 +513,10 @@ int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->orig_hash_lock); return 0; err: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); return -ENOMEM; } @@ -601,8 +584,6 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) /* resize all orig nodes because orig_node->bcast_own(_sum) depend on * if_num */ - spin_lock_bh(&bat_priv->orig_hash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -637,11 +618,9 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) rcu_read_unlock(); batman_if->if_num = -1; - spin_unlock_bh(&bat_priv->orig_hash_lock); return 0; err: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); return -ENOMEM; } diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index c4b7ae9..3cfa2c7 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -46,8 +46,6 @@ void slide_own_bcast_window(struct batman_if *batman_if) int i; size_t word_index; - spin_lock_bh(&bat_priv->orig_hash_lock); - for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -64,8 +62,6 @@ void slide_own_bcast_window(struct batman_if *batman_if) } rcu_read_unlock(); } - - spin_unlock_bh(&bat_priv->orig_hash_lock); } static void update_HNA(struct bat_priv *bat_priv, struct orig_node *orig_node, @@ -771,7 +767,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, orig_node : get_orig_node(bat_priv, ethhdr->h_source)); if (!orig_neigh_node) - goto out_neigh; + goto out; /* drop packet if sender is not a direct neighbor and if we * don't route towards it */ @@ -834,7 +830,6 @@ out: int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); struct ethhdr *ethhdr; /* drop packet if it has not necessary minimum size */ @@ -861,12 +856,10 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) ethhdr = (struct ethhdr *)skb_mac_header(skb); - spin_lock_bh(&bat_priv->orig_hash_lock); receive_aggr_bat_packet(ethhdr, skb->data, skb_headlen(skb), batman_if); - spin_unlock_bh(&bat_priv->orig_hash_lock); kfree_skb(skb); return NET_RX_SUCCESS; @@ -878,8 +871,6 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, struct orig_node *orig_node = NULL; struct neigh_node *neigh_node = NULL; struct icmp_packet_rr *icmp_packet; - struct batman_if *batman_if; - uint8_t dstaddr[ETH_ALEN]; int ret = NET_RX_DROP; icmp_packet = (struct icmp_packet_rr *)skb->data; @@ -895,7 +886,6 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, /* answer echo request (ping) */ /* get routing information */ - spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = orig_hash_find(bat_priv, icmp_packet->orig); @@ -914,12 +904,6 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, rcu_read_unlock(); - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); - /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, sizeof(struct ethhdr)) < 0) goto out; @@ -932,13 +916,12 @@ static int recv_my_icmp_packet(struct bat_priv *bat_priv, icmp_packet->msg_type = ECHO_REPLY; icmp_packet->ttl = TTL; - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = NET_RX_SUCCESS; goto out; unlock: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (neigh_node) neigh_node_free_ref(neigh_node); @@ -953,8 +936,6 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, struct orig_node *orig_node = NULL; struct neigh_node *neigh_node = NULL; struct icmp_packet *icmp_packet; - struct batman_if *batman_if; - uint8_t dstaddr[ETH_ALEN]; int ret = NET_RX_DROP; icmp_packet = (struct icmp_packet *)skb->data; @@ -971,7 +952,6 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, goto out; /* get routing information */ - spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = orig_hash_find(bat_priv, icmp_packet->orig); @@ -990,12 +970,6 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, rcu_read_unlock(); - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); - /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, sizeof(struct ethhdr)) < 0) goto out; @@ -1008,13 +982,12 @@ static int recv_icmp_ttl_exceeded(struct bat_priv *bat_priv, icmp_packet->msg_type = TTL_EXCEEDED; icmp_packet->ttl = TTL; - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = NET_RX_SUCCESS; goto out; unlock: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (neigh_node) neigh_node_free_ref(neigh_node); @@ -1031,9 +1004,7 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) struct ethhdr *ethhdr; struct orig_node *orig_node = NULL; struct neigh_node *neigh_node = NULL; - struct batman_if *batman_if; int hdr_size = sizeof(struct icmp_packet); - uint8_t dstaddr[ETH_ALEN]; int ret = NET_RX_DROP; /** @@ -1079,7 +1050,6 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) return recv_icmp_ttl_exceeded(bat_priv, skb); /* get routing information */ - spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = orig_hash_find(bat_priv, icmp_packet->dst); @@ -1098,12 +1068,6 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) rcu_read_unlock(); - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); - /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, sizeof(struct ethhdr)) < 0) goto out; @@ -1114,13 +1078,12 @@ int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) icmp_packet->ttl--; /* route it */ - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = NET_RX_SUCCESS; goto out; unlock: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (neigh_node) neigh_node_free_ref(neigh_node); @@ -1306,8 +1269,6 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct orig_node *orig_node = NULL; struct neigh_node *neigh_node = NULL; - struct batman_if *batman_if; - uint8_t dstaddr[ETH_ALEN]; struct unicast_packet *unicast_packet; struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); int ret = NET_RX_DROP; @@ -1324,7 +1285,6 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, } /* get routing information */ - spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = orig_hash_find(bat_priv, unicast_packet->dest); @@ -1336,16 +1296,8 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, /* find_router() increases neigh_nodes refcount if found. */ neigh_node = find_router(bat_priv, orig_node, recv_if); - if (!neigh_node) { - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (!neigh_node) goto out; - } - - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = neigh_node->if_incoming; - memcpy(dstaddr, neigh_node->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); /* create a copy of the skb, if needed, to modify it. */ if (skb_cow(skb, sizeof(struct ethhdr)) < 0) @@ -1355,12 +1307,14 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, if (unicast_packet->packet_type == BAT_UNICAST && atomic_read(&bat_priv->fragmentation) && - skb->len > batman_if->net_dev->mtu) - return frag_send_skb(skb, bat_priv, batman_if, - dstaddr); + skb->len > neigh_node->if_incoming->net_dev->mtu) { + ret = frag_send_skb(skb, bat_priv, + neigh_node->if_incoming, neigh_node->addr); + goto out; + } if (unicast_packet->packet_type == BAT_UNICAST_FRAG && - frag_can_reassemble(skb, batman_if->net_dev->mtu)) { + frag_can_reassemble(skb, neigh_node->if_incoming->net_dev->mtu)) { ret = frag_reassemble_skb(skb, bat_priv, &new_skb); @@ -1381,13 +1335,12 @@ int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, unicast_packet->ttl--; /* route it */ - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = NET_RX_SUCCESS; goto out; unlock: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (neigh_node) neigh_node_free_ref(neigh_node); @@ -1486,7 +1439,6 @@ int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) if (bcast_packet->ttl < 2) goto out; - spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = orig_hash_find(bat_priv, bcast_packet->orig); @@ -1515,7 +1467,6 @@ int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) orig_node->last_bcast_seqno = ntohl(bcast_packet->seqno); spin_unlock_bh(&orig_node->bcast_seqno_lock); - spin_unlock_bh(&bat_priv->orig_hash_lock); /* rebroadcast packet */ add_bcast_packet_to_list(bat_priv, skb); @@ -1527,11 +1478,9 @@ int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) rcu_unlock: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); goto out; spin_unlock: spin_unlock_bh(&orig_node->bcast_seqno_lock); - spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (orig_node) orig_node_free_ref(orig_node); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 1be76fe..a9bf186 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -159,7 +159,6 @@ struct bat_priv { struct hashtable_t *hna_local_hash; struct hashtable_t *hna_global_hash; struct hashtable_t *vis_hash; - spinlock_t orig_hash_lock; /* protects orig_hash */ spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects */ spinlock_t hna_lhash_lock; /* protects hna_local_hash */ diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 2ab8198..b411438 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -179,10 +179,9 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, *new_skb = NULL; - spin_lock_bh(&bat_priv->orig_hash_lock); orig_node = orig_hash_find(bat_priv, unicast_packet->orig); if (!orig_node) - goto unlock; + goto out; orig_node->last_frag_packet = jiffies; @@ -207,8 +206,6 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, if (*new_skb) ret = NET_RX_SUCCESS; -unlock: - spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (orig_node) orig_node_free_ref(orig_node); @@ -281,14 +278,10 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct unicast_packet *unicast_packet; struct orig_node *orig_node; - struct batman_if *batman_if; struct neigh_node *neigh_node; int data_len = skb->len; - uint8_t dstaddr[6]; int ret = 1; - spin_lock_bh(&bat_priv->orig_hash_lock); - /* get routing information */ if (is_multicast_ether_addr(ethhdr->h_dest)) { orig_node = (struct orig_node *)gw_get_selected(bat_priv); @@ -300,23 +293,21 @@ int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv) orig_node = transtable_search(bat_priv, ethhdr->h_dest); find_router: - /* find_router() increases neigh_nodes refcount if found. */ + /** + * find_router(): + * - if orig_node is NULL it returns NULL + * - increases neigh_nodes refcount if found. + */ neigh_node = find_router(bat_priv, orig_node, NULL); if (!neigh_node) - goto unlock; + goto out; if (neigh_node->if_incoming->if_status != IF_ACTIVE) - goto unlock; + goto out; if (my_skb_head_push(skb, sizeof(struct unicast_packet)) < 0) - goto unlock; - - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = neigh_node->if_incoming; - memcpy(dstaddr, neigh_node->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); + goto out; unicast_packet = (struct unicast_packet *)skb->data; @@ -330,19 +321,18 @@ find_router: if (atomic_read(&bat_priv->fragmentation) && data_len + sizeof(struct unicast_packet) > - batman_if->net_dev->mtu) { + neigh_node->if_incoming->net_dev->mtu) { /* send frag skb decreases ttl */ unicast_packet->ttl++; - ret = frag_send_skb(skb, bat_priv, batman_if, dstaddr); + ret = frag_send_skb(skb, bat_priv, + neigh_node->if_incoming, neigh_node->addr); goto out; } - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = 0; goto out; -unlock: - spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (neigh_node) neigh_node_free_ref(neigh_node); diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 8972242..e8911cb 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -614,7 +614,6 @@ static int generate_vis_packet(struct bat_priv *bat_priv) info->first_seen = jiffies; packet->vis_type = atomic_read(&bat_priv->vis_mode); - spin_lock_bh(&bat_priv->orig_hash_lock); memcpy(packet->target_orig, broadcast_addr, ETH_ALEN); packet->ttl = TTL; packet->seqno = htonl(ntohl(packet->seqno) + 1); @@ -624,10 +623,8 @@ static int generate_vis_packet(struct bat_priv *bat_priv) if (packet->vis_type == VIS_TYPE_CLIENT_UPDATE) { best_tq = find_best_vis_server(bat_priv, info); - if (best_tq < 0) { - spin_unlock_bh(&bat_priv->orig_hash_lock); + if (best_tq < 0) return -1; - } } for (i = 0; i < hash->size; i++) { @@ -659,17 +656,12 @@ static int generate_vis_packet(struct bat_priv *bat_priv) entry->quality = neigh_node->tq_avg; packet->entries++; - if (vis_packet_full(info)) { - rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); - return 0; - } + if (vis_packet_full(info)) + goto unlock; } rcu_read_unlock(); } - spin_unlock_bh(&bat_priv->orig_hash_lock); - hash = bat_priv->hna_local_hash; spin_lock_bh(&bat_priv->hna_lhash_lock); @@ -694,6 +686,10 @@ static int generate_vis_packet(struct bat_priv *bat_priv) spin_unlock_bh(&bat_priv->hna_lhash_lock); return 0; + +unlock: + rcu_read_unlock(); + return 0; } /* free old vis packets. Must be called with this vis_hash_lock @@ -739,7 +735,6 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, int i; - spin_lock_bh(&bat_priv->orig_hash_lock); packet = (struct vis_packet *)info->skb_packet->data; /* send to all routers in range. */ @@ -762,18 +757,14 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); batman_if = orig_node->router->if_incoming; memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); skb = skb_clone(info->skb_packet, GFP_ATOMIC); if (skb) send_skb_packet(skb, batman_if, dstaddr); - spin_lock_bh(&bat_priv->orig_hash_lock); } rcu_read_unlock(); } - - spin_unlock_bh(&bat_priv->orig_hash_lock); } static void unicast_vis_packet(struct bat_priv *bat_priv, @@ -783,12 +774,9 @@ static void unicast_vis_packet(struct bat_priv *bat_priv, struct neigh_node *neigh_node = NULL; struct sk_buff *skb; struct vis_packet *packet; - struct batman_if *batman_if; - uint8_t dstaddr[ETH_ALEN]; packet = (struct vis_packet *)info->skb_packet->data; - spin_lock_bh(&bat_priv->orig_hash_lock); rcu_read_lock(); orig_node = orig_hash_find(bat_priv, packet->target_orig); @@ -807,21 +795,15 @@ static void unicast_vis_packet(struct bat_priv *bat_priv, rcu_read_unlock(); - /* don't lock while sending the packets ... we therefore - * copy the required data before sending */ - batman_if = orig_node->router->if_incoming; - memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); - spin_unlock_bh(&bat_priv->orig_hash_lock); - skb = skb_clone(info->skb_packet, GFP_ATOMIC); if (skb) - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, neigh_node->if_incoming, + neigh_node->addr); goto out; unlock: rcu_read_unlock(); - spin_unlock_bh(&bat_priv->orig_hash_lock); out: if (neigh_node) neigh_node_free_ref(neigh_node); -- cgit v1.1 From 4389e47af856635eb17d03b2572a50576c12db24 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 18 Feb 2011 12:33:19 +0000 Subject: batman-adv: rename global if_list to hardif_list Batman-adv works with "hard interfaces" as well as "soft interfaces". The new name should better make clear which kind of interfaces this list stores. Signed-off-by: Marek Lindner --- net/batman-adv/hard-interface.c | 28 ++++++++++++++-------------- net/batman-adv/main.c | 6 +++--- net/batman-adv/main.h | 2 +- net/batman-adv/originator.c | 2 +- net/batman-adv/routing.c | 2 +- net/batman-adv/send.c | 4 ++-- 6 files changed, 22 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index de9bd36..4a2e6e3 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -31,8 +31,8 @@ #include -/* protect update critical side of if_list - but not the content */ -static DEFINE_SPINLOCK(if_list_lock); +/* protect update critical side of hardif_list - but not the content */ +static DEFINE_SPINLOCK(hardif_list_lock); static int batman_skb_recv(struct sk_buff *skb, @@ -54,7 +54,7 @@ struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) struct batman_if *batman_if; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { + list_for_each_entry_rcu(batman_if, &hardif_list, list) { if (batman_if->net_dev == net_dev && atomic_inc_not_zero(&batman_if->refcount)) goto out; @@ -99,7 +99,7 @@ static struct batman_if *get_active_batman_if(struct net_device *soft_iface) struct batman_if *batman_if; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { + list_for_each_entry_rcu(batman_if, &hardif_list, list) { if (batman_if->soft_iface != soft_iface) continue; @@ -179,7 +179,7 @@ static void check_known_mac_addr(struct net_device *net_dev) struct batman_if *batman_if; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { + list_for_each_entry_rcu(batman_if, &hardif_list, list) { if ((batman_if->if_status != IF_ACTIVE) && (batman_if->if_status != IF_TO_BE_ACTIVATED)) continue; @@ -212,7 +212,7 @@ int hardif_min_mtu(struct net_device *soft_iface) goto out; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { + list_for_each_entry_rcu(batman_if, &hardif_list, list) { if ((batman_if->if_status != IF_ACTIVE) && (batman_if->if_status != IF_TO_BE_ACTIVATED)) continue; @@ -449,9 +449,9 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev) check_known_mac_addr(batman_if->net_dev); - spin_lock(&if_list_lock); - list_add_tail_rcu(&batman_if->list, &if_list); - spin_unlock(&if_list_lock); + spin_lock(&hardif_list_lock); + list_add_tail_rcu(&batman_if->list, &hardif_list); + spin_unlock(&hardif_list_lock); return batman_if; @@ -484,12 +484,12 @@ void hardif_remove_interfaces(void) INIT_LIST_HEAD(&if_queue); - spin_lock(&if_list_lock); - list_for_each_entry_safe(batman_if, batman_if_tmp, &if_list, list) { + spin_lock(&hardif_list_lock); + list_for_each_entry_safe(batman_if, batman_if_tmp, &hardif_list, list) { list_del_rcu(&batman_if->list); list_add_tail(&batman_if->list, &if_queue); } - spin_unlock(&if_list_lock); + spin_unlock(&hardif_list_lock); rtnl_lock(); list_for_each_entry_safe(batman_if, batman_if_tmp, &if_queue, list) { @@ -520,9 +520,9 @@ static int hard_if_event(struct notifier_block *this, hardif_deactivate_interface(batman_if); break; case NETDEV_UNREGISTER: - spin_lock(&if_list_lock); + spin_lock(&hardif_list_lock); list_del_rcu(&batman_if->list); - spin_unlock(&if_list_lock); + spin_unlock(&hardif_list_lock); hardif_remove_interface(batman_if); break; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 09c21f2..57aea9b 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -33,7 +33,7 @@ #include "vis.h" #include "hash.h" -struct list_head if_list; +struct list_head hardif_list; unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -41,7 +41,7 @@ struct workqueue_struct *bat_event_workqueue; static int __init batman_init(void) { - INIT_LIST_HEAD(&if_list); + INIT_LIST_HEAD(&hardif_list); /* the name should not be longer than 10 chars - see * http://lwn.net/Articles/23634/ */ @@ -156,7 +156,7 @@ int is_my_mac(uint8_t *addr) struct batman_if *batman_if; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { + list_for_each_entry_rcu(batman_if, &hardif_list, list) { if (batman_if->if_status != IF_ACTIVE) continue; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 06b5b99..dc24869 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -122,7 +122,7 @@ #define REVISION_VERSION_STR " "REVISION_VERSION #endif -extern struct list_head if_list; +extern struct list_head hardif_list; extern unsigned char broadcast_addr[]; extern struct workqueue_struct *bat_event_workqueue; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a8d0262..84ef9ae 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -602,7 +602,7 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) /* renumber remaining batman interfaces _inside_ of orig_hash_lock */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if_tmp, &if_list, list) { + list_for_each_entry_rcu(batman_if_tmp, &hardif_list, list) { if (batman_if_tmp->if_status == IF_NOT_IN_USE) continue; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 3cfa2c7..21e93b3 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -643,7 +643,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, has_directlink_flag); rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { + list_for_each_entry_rcu(batman_if, &hardif_list, list) { if (batman_if->if_status != IF_ACTIVE) continue; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index f0232ad..c4f3e49 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -204,7 +204,7 @@ static void send_packet(struct forw_packet *forw_packet) /* broadcast on every interface */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { + list_for_each_entry_rcu(batman_if, &hardif_list, list) { if (batman_if->soft_iface != soft_iface) continue; @@ -461,7 +461,7 @@ static void send_outstanding_bcast_packet(struct work_struct *work) /* rebroadcast packet */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &if_list, list) { + list_for_each_entry_rcu(batman_if, &hardif_list, list) { if (batman_if->soft_iface != soft_iface) continue; -- cgit v1.1 From e6c10f433af9c98994c94a10ae862c152fcfb2a9 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 18 Feb 2011 12:33:20 +0000 Subject: batman-adv: rename batman_if struct to hard_iface Signed-off-by: Marek Lindner --- net/batman-adv/aggregation.c | 8 +- net/batman-adv/aggregation.h | 4 +- net/batman-adv/bat_sysfs.c | 41 ++--- net/batman-adv/hard-interface.c | 353 ++++++++++++++++++++-------------------- net/batman-adv/hard-interface.h | 12 +- net/batman-adv/main.c | 8 +- net/batman-adv/originator.c | 28 ++-- net/batman-adv/originator.h | 6 +- net/batman-adv/routing.c | 48 +++--- net/batman-adv/routing.h | 20 +-- net/batman-adv/send.c | 101 ++++++------ net/batman-adv/send.h | 8 +- net/batman-adv/soft-interface.c | 2 +- net/batman-adv/soft-interface.h | 2 +- net/batman-adv/types.h | 8 +- net/batman-adv/unicast.c | 8 +- net/batman-adv/unicast.h | 2 +- net/batman-adv/vis.c | 6 +- 18 files changed, 335 insertions(+), 330 deletions(-) (limited to 'net') diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c index 1997725..af45d6b 100644 --- a/net/batman-adv/aggregation.c +++ b/net/batman-adv/aggregation.c @@ -35,7 +35,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet, int packet_len, unsigned long send_time, bool directlink, - struct batman_if *if_incoming, + struct hard_iface *if_incoming, struct forw_packet *forw_packet) { struct batman_packet *batman_packet = @@ -99,7 +99,7 @@ static bool can_aggregate_with(struct batman_packet *new_batman_packet, /* create a new aggregated packet and add this packet to it */ static void new_aggregated_packet(unsigned char *packet_buff, int packet_len, unsigned long send_time, bool direct_link, - struct batman_if *if_incoming, + struct hard_iface *if_incoming, int own_packet) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); @@ -188,7 +188,7 @@ static void aggregate(struct forw_packet *forw_packet_aggr, void add_bat_packet_to_list(struct bat_priv *bat_priv, unsigned char *packet_buff, int packet_len, - struct batman_if *if_incoming, char own_packet, + struct hard_iface *if_incoming, char own_packet, unsigned long send_time) { /** @@ -247,7 +247,7 @@ void add_bat_packet_to_list(struct bat_priv *bat_priv, /* unpack the aggregated packets and process them one by one */ void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, - int packet_len, struct batman_if *if_incoming) + int packet_len, struct hard_iface *if_incoming) { struct batman_packet *batman_packet; int buff_pos = 0; diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h index 6ce305b..0622042 100644 --- a/net/batman-adv/aggregation.h +++ b/net/batman-adv/aggregation.h @@ -35,9 +35,9 @@ static inline int aggregated_packet(int buff_pos, int packet_len, int num_hna) void add_bat_packet_to_list(struct bat_priv *bat_priv, unsigned char *packet_buff, int packet_len, - struct batman_if *if_incoming, char own_packet, + struct hard_iface *if_incoming, char own_packet, unsigned long send_time); void receive_aggr_bat_packet(struct ethhdr *ethhdr, unsigned char *packet_buff, - int packet_len, struct batman_if *if_incoming); + int packet_len, struct hard_iface *if_incoming); #endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */ diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index 93ae20a..e449bf6 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -441,16 +441,16 @@ static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr, char *buff) { struct net_device *net_dev = kobj_to_netdev(kobj); - struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); ssize_t length; - if (!batman_if) + if (!hard_iface) return 0; - length = sprintf(buff, "%s\n", batman_if->if_status == IF_NOT_IN_USE ? - "none" : batman_if->soft_iface->name); + length = sprintf(buff, "%s\n", hard_iface->if_status == IF_NOT_IN_USE ? + "none" : hard_iface->soft_iface->name); - hardif_free_ref(batman_if); + hardif_free_ref(hard_iface); return length; } @@ -459,11 +459,11 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { struct net_device *net_dev = kobj_to_netdev(kobj); - struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); int status_tmp = -1; int ret = count; - if (!batman_if) + if (!hard_iface) return count; if (buff[count - 1] == '\n') @@ -472,7 +472,7 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, if (strlen(buff) >= IFNAMSIZ) { pr_err("Invalid parameter for 'mesh_iface' setting received: " "interface name too long '%s'\n", buff); - hardif_free_ref(batman_if); + hardif_free_ref(hard_iface); return -EINVAL; } @@ -481,28 +481,31 @@ static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr, else status_tmp = IF_I_WANT_YOU; - if ((batman_if->if_status == status_tmp) || ((batman_if->soft_iface) && - (strncmp(batman_if->soft_iface->name, buff, IFNAMSIZ) == 0))) + if (hard_iface->if_status == status_tmp) + goto out; + + if ((hard_iface->soft_iface) && + (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0)) goto out; if (status_tmp == IF_NOT_IN_USE) { rtnl_lock(); - hardif_disable_interface(batman_if); + hardif_disable_interface(hard_iface); rtnl_unlock(); goto out; } /* if the interface already is in use */ - if (batman_if->if_status != IF_NOT_IN_USE) { + if (hard_iface->if_status != IF_NOT_IN_USE) { rtnl_lock(); - hardif_disable_interface(batman_if); + hardif_disable_interface(hard_iface); rtnl_unlock(); } - ret = hardif_enable_interface(batman_if, buff); + ret = hardif_enable_interface(hard_iface, buff); out: - hardif_free_ref(batman_if); + hardif_free_ref(hard_iface); return ret; } @@ -510,13 +513,13 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr, char *buff) { struct net_device *net_dev = kobj_to_netdev(kobj); - struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); ssize_t length; - if (!batman_if) + if (!hard_iface) return 0; - switch (batman_if->if_status) { + switch (hard_iface->if_status) { case IF_TO_BE_REMOVED: length = sprintf(buff, "disabling\n"); break; @@ -535,7 +538,7 @@ static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr, break; } - hardif_free_ref(batman_if); + hardif_free_ref(hard_iface); return length; } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 4a2e6e3..95a35b6 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -42,29 +42,29 @@ static int batman_skb_recv(struct sk_buff *skb, void hardif_free_rcu(struct rcu_head *rcu) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; - batman_if = container_of(rcu, struct batman_if, rcu); - dev_put(batman_if->net_dev); - kfree(batman_if); + hard_iface = container_of(rcu, struct hard_iface, rcu); + dev_put(hard_iface->net_dev); + kfree(hard_iface); } -struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev) +struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &hardif_list, list) { - if (batman_if->net_dev == net_dev && - atomic_inc_not_zero(&batman_if->refcount)) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->net_dev == net_dev && + atomic_inc_not_zero(&hard_iface->refcount)) goto out; } - batman_if = NULL; + hard_iface = NULL; out: rcu_read_unlock(); - return batman_if; + return hard_iface; } static int is_valid_iface(struct net_device *net_dev) @@ -94,25 +94,25 @@ static int is_valid_iface(struct net_device *net_dev) return 1; } -static struct batman_if *get_active_batman_if(struct net_device *soft_iface) +static struct hard_iface *hardif_get_active(struct net_device *soft_iface) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &hardif_list, list) { - if (batman_if->soft_iface != soft_iface) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) continue; - if (batman_if->if_status == IF_ACTIVE && - atomic_inc_not_zero(&batman_if->refcount)) + if (hard_iface->if_status == IF_ACTIVE && + atomic_inc_not_zero(&hard_iface->refcount)) goto out; } - batman_if = NULL; + hard_iface = NULL; out: rcu_read_unlock(); - return batman_if; + return hard_iface; } static void update_primary_addr(struct bat_priv *bat_priv) @@ -128,16 +128,16 @@ static void update_primary_addr(struct bat_priv *bat_priv) } static void set_primary_if(struct bat_priv *bat_priv, - struct batman_if *batman_if) + struct hard_iface *hard_iface) { struct batman_packet *batman_packet; - struct batman_if *old_if; + struct hard_iface *old_if; - if (batman_if && !atomic_inc_not_zero(&batman_if->refcount)) - batman_if = NULL; + if (hard_iface && !atomic_inc_not_zero(&hard_iface->refcount)) + hard_iface = NULL; old_if = bat_priv->primary_if; - bat_priv->primary_if = batman_if; + bat_priv->primary_if = hard_iface; if (old_if) hardif_free_ref(old_if); @@ -145,7 +145,7 @@ static void set_primary_if(struct bat_priv *bat_priv, if (!bat_priv->primary_if) return; - batman_packet = (struct batman_packet *)(batman_if->packet_buff); + batman_packet = (struct batman_packet *)(hard_iface->packet_buff); batman_packet->flags = PRIMARIES_FIRST_HOP; batman_packet->ttl = TTL; @@ -158,42 +158,42 @@ static void set_primary_if(struct bat_priv *bat_priv, atomic_set(&bat_priv->hna_local_changed, 1); } -static bool hardif_is_iface_up(struct batman_if *batman_if) +static bool hardif_is_iface_up(struct hard_iface *hard_iface) { - if (batman_if->net_dev->flags & IFF_UP) + if (hard_iface->net_dev->flags & IFF_UP) return true; return false; } -static void update_mac_addresses(struct batman_if *batman_if) +static void update_mac_addresses(struct hard_iface *hard_iface) { - memcpy(((struct batman_packet *)(batman_if->packet_buff))->orig, - batman_if->net_dev->dev_addr, ETH_ALEN); - memcpy(((struct batman_packet *)(batman_if->packet_buff))->prev_sender, - batman_if->net_dev->dev_addr, ETH_ALEN); + memcpy(((struct batman_packet *)(hard_iface->packet_buff))->orig, + hard_iface->net_dev->dev_addr, ETH_ALEN); + memcpy(((struct batman_packet *)(hard_iface->packet_buff))->prev_sender, + hard_iface->net_dev->dev_addr, ETH_ALEN); } static void check_known_mac_addr(struct net_device *net_dev) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &hardif_list, list) { - if ((batman_if->if_status != IF_ACTIVE) && - (batman_if->if_status != IF_TO_BE_ACTIVATED)) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if ((hard_iface->if_status != IF_ACTIVE) && + (hard_iface->if_status != IF_TO_BE_ACTIVATED)) continue; - if (batman_if->net_dev == net_dev) + if (hard_iface->net_dev == net_dev) continue; - if (!compare_eth(batman_if->net_dev->dev_addr, - net_dev->dev_addr)) + if (!compare_eth(hard_iface->net_dev->dev_addr, + net_dev->dev_addr)) continue; pr_warning("The newly added mac address (%pM) already exists " "on: %s\n", net_dev->dev_addr, - batman_if->net_dev->name); + hard_iface->net_dev->name); pr_warning("It is strongly recommended to keep mac addresses " "unique to avoid problems!\n"); } @@ -203,7 +203,7 @@ static void check_known_mac_addr(struct net_device *net_dev) int hardif_min_mtu(struct net_device *soft_iface) { struct bat_priv *bat_priv = netdev_priv(soft_iface); - struct batman_if *batman_if; + struct hard_iface *hard_iface; /* allow big frames if all devices are capable to do so * (have MTU > 1500 + BAT_HEADER_LEN) */ int min_mtu = ETH_DATA_LEN; @@ -212,15 +212,15 @@ int hardif_min_mtu(struct net_device *soft_iface) goto out; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &hardif_list, list) { - if ((batman_if->if_status != IF_ACTIVE) && - (batman_if->if_status != IF_TO_BE_ACTIVATED)) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if ((hard_iface->if_status != IF_ACTIVE) && + (hard_iface->if_status != IF_TO_BE_ACTIVATED)) continue; - if (batman_if->soft_iface != soft_iface) + if (hard_iface->soft_iface != soft_iface) continue; - min_mtu = min_t(int, batman_if->net_dev->mtu - BAT_HEADER_LEN, + min_mtu = min_t(int, hard_iface->net_dev->mtu - BAT_HEADER_LEN, min_mtu); } rcu_read_unlock(); @@ -238,80 +238,80 @@ void update_min_mtu(struct net_device *soft_iface) soft_iface->mtu = min_mtu; } -static void hardif_activate_interface(struct batman_if *batman_if) +static void hardif_activate_interface(struct hard_iface *hard_iface) { struct bat_priv *bat_priv; - if (batman_if->if_status != IF_INACTIVE) + if (hard_iface->if_status != IF_INACTIVE) return; - bat_priv = netdev_priv(batman_if->soft_iface); + bat_priv = netdev_priv(hard_iface->soft_iface); - update_mac_addresses(batman_if); - batman_if->if_status = IF_TO_BE_ACTIVATED; + update_mac_addresses(hard_iface); + hard_iface->if_status = IF_TO_BE_ACTIVATED; /** * the first active interface becomes our primary interface or * the next active interface after the old primay interface was removed */ if (!bat_priv->primary_if) - set_primary_if(bat_priv, batman_if); + set_primary_if(bat_priv, hard_iface); - bat_info(batman_if->soft_iface, "Interface activated: %s\n", - batman_if->net_dev->name); + bat_info(hard_iface->soft_iface, "Interface activated: %s\n", + hard_iface->net_dev->name); - update_min_mtu(batman_if->soft_iface); + update_min_mtu(hard_iface->soft_iface); return; } -static void hardif_deactivate_interface(struct batman_if *batman_if) +static void hardif_deactivate_interface(struct hard_iface *hard_iface) { - if ((batman_if->if_status != IF_ACTIVE) && - (batman_if->if_status != IF_TO_BE_ACTIVATED)) + if ((hard_iface->if_status != IF_ACTIVE) && + (hard_iface->if_status != IF_TO_BE_ACTIVATED)) return; - batman_if->if_status = IF_INACTIVE; + hard_iface->if_status = IF_INACTIVE; - bat_info(batman_if->soft_iface, "Interface deactivated: %s\n", - batman_if->net_dev->name); + bat_info(hard_iface->soft_iface, "Interface deactivated: %s\n", + hard_iface->net_dev->name); - update_min_mtu(batman_if->soft_iface); + update_min_mtu(hard_iface->soft_iface); } -int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) +int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) { struct bat_priv *bat_priv; struct batman_packet *batman_packet; - if (batman_if->if_status != IF_NOT_IN_USE) + if (hard_iface->if_status != IF_NOT_IN_USE) goto out; - if (!atomic_inc_not_zero(&batman_if->refcount)) + if (!atomic_inc_not_zero(&hard_iface->refcount)) goto out; - batman_if->soft_iface = dev_get_by_name(&init_net, iface_name); + hard_iface->soft_iface = dev_get_by_name(&init_net, iface_name); - if (!batman_if->soft_iface) { - batman_if->soft_iface = softif_create(iface_name); + if (!hard_iface->soft_iface) { + hard_iface->soft_iface = softif_create(iface_name); - if (!batman_if->soft_iface) + if (!hard_iface->soft_iface) goto err; /* dev_get_by_name() increases the reference counter for us */ - dev_hold(batman_if->soft_iface); + dev_hold(hard_iface->soft_iface); } - bat_priv = netdev_priv(batman_if->soft_iface); - batman_if->packet_len = BAT_PACKET_LEN; - batman_if->packet_buff = kmalloc(batman_if->packet_len, GFP_ATOMIC); + bat_priv = netdev_priv(hard_iface->soft_iface); + hard_iface->packet_len = BAT_PACKET_LEN; + hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); - if (!batman_if->packet_buff) { - bat_err(batman_if->soft_iface, "Can't add interface packet " - "(%s): out of memory\n", batman_if->net_dev->name); + if (!hard_iface->packet_buff) { + bat_err(hard_iface->soft_iface, "Can't add interface packet " + "(%s): out of memory\n", hard_iface->net_dev->name); goto err; } - batman_packet = (struct batman_packet *)(batman_if->packet_buff); + batman_packet = (struct batman_packet *)(hard_iface->packet_buff); batman_packet->packet_type = BAT_PACKET; batman_packet->version = COMPAT_VERSION; batman_packet->flags = 0; @@ -319,107 +319,107 @@ int hardif_enable_interface(struct batman_if *batman_if, char *iface_name) batman_packet->tq = TQ_MAX_VALUE; batman_packet->num_hna = 0; - batman_if->if_num = bat_priv->num_ifaces; + hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; - batman_if->if_status = IF_INACTIVE; - orig_hash_add_if(batman_if, bat_priv->num_ifaces); + hard_iface->if_status = IF_INACTIVE; + orig_hash_add_if(hard_iface, bat_priv->num_ifaces); - batman_if->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN); - batman_if->batman_adv_ptype.func = batman_skb_recv; - batman_if->batman_adv_ptype.dev = batman_if->net_dev; - dev_add_pack(&batman_if->batman_adv_ptype); + hard_iface->batman_adv_ptype.type = __constant_htons(ETH_P_BATMAN); + hard_iface->batman_adv_ptype.func = batman_skb_recv; + hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; + dev_add_pack(&hard_iface->batman_adv_ptype); - atomic_set(&batman_if->seqno, 1); - atomic_set(&batman_if->frag_seqno, 1); - bat_info(batman_if->soft_iface, "Adding interface: %s\n", - batman_if->net_dev->name); + atomic_set(&hard_iface->seqno, 1); + atomic_set(&hard_iface->frag_seqno, 1); + bat_info(hard_iface->soft_iface, "Adding interface: %s\n", + hard_iface->net_dev->name); - if (atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < + if (atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu < ETH_DATA_LEN + BAT_HEADER_LEN) - bat_info(batman_if->soft_iface, + bat_info(hard_iface->soft_iface, "The MTU of interface %s is too small (%i) to handle " "the transport of batman-adv packets. Packets going " "over this interface will be fragmented on layer2 " "which could impact the performance. Setting the MTU " "to %zi would solve the problem.\n", - batman_if->net_dev->name, batman_if->net_dev->mtu, + hard_iface->net_dev->name, hard_iface->net_dev->mtu, ETH_DATA_LEN + BAT_HEADER_LEN); - if (!atomic_read(&bat_priv->fragmentation) && batman_if->net_dev->mtu < + if (!atomic_read(&bat_priv->fragmentation) && hard_iface->net_dev->mtu < ETH_DATA_LEN + BAT_HEADER_LEN) - bat_info(batman_if->soft_iface, + bat_info(hard_iface->soft_iface, "The MTU of interface %s is too small (%i) to handle " "the transport of batman-adv packets. If you experience" " problems getting traffic through try increasing the " "MTU to %zi.\n", - batman_if->net_dev->name, batman_if->net_dev->mtu, + hard_iface->net_dev->name, hard_iface->net_dev->mtu, ETH_DATA_LEN + BAT_HEADER_LEN); - if (hardif_is_iface_up(batman_if)) - hardif_activate_interface(batman_if); + if (hardif_is_iface_up(hard_iface)) + hardif_activate_interface(hard_iface); else - bat_err(batman_if->soft_iface, "Not using interface %s " + bat_err(hard_iface->soft_iface, "Not using interface %s " "(retrying later): interface not active\n", - batman_if->net_dev->name); + hard_iface->net_dev->name); /* begin scheduling originator messages on that interface */ - schedule_own_packet(batman_if); + schedule_own_packet(hard_iface); out: return 0; err: - hardif_free_ref(batman_if); + hardif_free_ref(hard_iface); return -ENOMEM; } -void hardif_disable_interface(struct batman_if *batman_if) +void hardif_disable_interface(struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - if (batman_if->if_status == IF_ACTIVE) - hardif_deactivate_interface(batman_if); + if (hard_iface->if_status == IF_ACTIVE) + hardif_deactivate_interface(hard_iface); - if (batman_if->if_status != IF_INACTIVE) + if (hard_iface->if_status != IF_INACTIVE) return; - bat_info(batman_if->soft_iface, "Removing interface: %s\n", - batman_if->net_dev->name); - dev_remove_pack(&batman_if->batman_adv_ptype); + bat_info(hard_iface->soft_iface, "Removing interface: %s\n", + hard_iface->net_dev->name); + dev_remove_pack(&hard_iface->batman_adv_ptype); bat_priv->num_ifaces--; - orig_hash_del_if(batman_if, bat_priv->num_ifaces); + orig_hash_del_if(hard_iface, bat_priv->num_ifaces); - if (batman_if == bat_priv->primary_if) { - struct batman_if *new_if; + if (hard_iface == bat_priv->primary_if) { + struct hard_iface *new_if; - new_if = get_active_batman_if(batman_if->soft_iface); + new_if = hardif_get_active(hard_iface->soft_iface); set_primary_if(bat_priv, new_if); if (new_if) hardif_free_ref(new_if); } - kfree(batman_if->packet_buff); - batman_if->packet_buff = NULL; - batman_if->if_status = IF_NOT_IN_USE; + kfree(hard_iface->packet_buff); + hard_iface->packet_buff = NULL; + hard_iface->if_status = IF_NOT_IN_USE; - /* delete all references to this batman_if */ + /* delete all references to this hard_iface */ purge_orig_ref(bat_priv); - purge_outstanding_packets(bat_priv, batman_if); - dev_put(batman_if->soft_iface); + purge_outstanding_packets(bat_priv, hard_iface); + dev_put(hard_iface->soft_iface); /* nobody uses this interface anymore */ if (!bat_priv->num_ifaces) - softif_destroy(batman_if->soft_iface); + softif_destroy(hard_iface->soft_iface); - batman_if->soft_iface = NULL; - hardif_free_ref(batman_if); + hard_iface->soft_iface = NULL; + hardif_free_ref(hard_iface); } -static struct batman_if *hardif_add_interface(struct net_device *net_dev) +static struct hard_iface *hardif_add_interface(struct net_device *net_dev) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; int ret; ret = is_valid_iface(net_dev); @@ -428,72 +428,73 @@ static struct batman_if *hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - batman_if = kmalloc(sizeof(struct batman_if), GFP_ATOMIC); - if (!batman_if) { + hard_iface = kmalloc(sizeof(struct hard_iface), GFP_ATOMIC); + if (!hard_iface) { pr_err("Can't add interface (%s): out of memory\n", net_dev->name); goto release_dev; } - ret = sysfs_add_hardif(&batman_if->hardif_obj, net_dev); + ret = sysfs_add_hardif(&hard_iface->hardif_obj, net_dev); if (ret) goto free_if; - batman_if->if_num = -1; - batman_if->net_dev = net_dev; - batman_if->soft_iface = NULL; - batman_if->if_status = IF_NOT_IN_USE; - INIT_LIST_HEAD(&batman_if->list); + hard_iface->if_num = -1; + hard_iface->net_dev = net_dev; + hard_iface->soft_iface = NULL; + hard_iface->if_status = IF_NOT_IN_USE; + INIT_LIST_HEAD(&hard_iface->list); /* extra reference for return */ - atomic_set(&batman_if->refcount, 2); + atomic_set(&hard_iface->refcount, 2); - check_known_mac_addr(batman_if->net_dev); + check_known_mac_addr(hard_iface->net_dev); spin_lock(&hardif_list_lock); - list_add_tail_rcu(&batman_if->list, &hardif_list); + list_add_tail_rcu(&hard_iface->list, &hardif_list); spin_unlock(&hardif_list_lock); - return batman_if; + return hard_iface; free_if: - kfree(batman_if); + kfree(hard_iface); release_dev: dev_put(net_dev); out: return NULL; } -static void hardif_remove_interface(struct batman_if *batman_if) +static void hardif_remove_interface(struct hard_iface *hard_iface) { /* first deactivate interface */ - if (batman_if->if_status != IF_NOT_IN_USE) - hardif_disable_interface(batman_if); + if (hard_iface->if_status != IF_NOT_IN_USE) + hardif_disable_interface(hard_iface); - if (batman_if->if_status != IF_NOT_IN_USE) + if (hard_iface->if_status != IF_NOT_IN_USE) return; - batman_if->if_status = IF_TO_BE_REMOVED; - sysfs_del_hardif(&batman_if->hardif_obj); - hardif_free_ref(batman_if); + hard_iface->if_status = IF_TO_BE_REMOVED; + sysfs_del_hardif(&hard_iface->hardif_obj); + hardif_free_ref(hard_iface); } void hardif_remove_interfaces(void) { - struct batman_if *batman_if, *batman_if_tmp; + struct hard_iface *hard_iface, *hard_iface_tmp; struct list_head if_queue; INIT_LIST_HEAD(&if_queue); spin_lock(&hardif_list_lock); - list_for_each_entry_safe(batman_if, batman_if_tmp, &hardif_list, list) { - list_del_rcu(&batman_if->list); - list_add_tail(&batman_if->list, &if_queue); + list_for_each_entry_safe(hard_iface, hard_iface_tmp, + &hardif_list, list) { + list_del_rcu(&hard_iface->list); + list_add_tail(&hard_iface->list, &if_queue); } spin_unlock(&hardif_list_lock); rtnl_lock(); - list_for_each_entry_safe(batman_if, batman_if_tmp, &if_queue, list) { - hardif_remove_interface(batman_if); + list_for_each_entry_safe(hard_iface, hard_iface_tmp, &if_queue, list) { + hardif_remove_interface(hard_iface); } rtnl_unlock(); } @@ -502,43 +503,43 @@ static int hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *net_dev = (struct net_device *)ptr; - struct batman_if *batman_if = get_batman_if_by_netdev(net_dev); + struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev); struct bat_priv *bat_priv; - if (!batman_if && event == NETDEV_REGISTER) - batman_if = hardif_add_interface(net_dev); + if (!hard_iface && event == NETDEV_REGISTER) + hard_iface = hardif_add_interface(net_dev); - if (!batman_if) + if (!hard_iface) goto out; switch (event) { case NETDEV_UP: - hardif_activate_interface(batman_if); + hardif_activate_interface(hard_iface); break; case NETDEV_GOING_DOWN: case NETDEV_DOWN: - hardif_deactivate_interface(batman_if); + hardif_deactivate_interface(hard_iface); break; case NETDEV_UNREGISTER: spin_lock(&hardif_list_lock); - list_del_rcu(&batman_if->list); + list_del_rcu(&hard_iface->list); spin_unlock(&hardif_list_lock); - hardif_remove_interface(batman_if); + hardif_remove_interface(hard_iface); break; case NETDEV_CHANGEMTU: - if (batman_if->soft_iface) - update_min_mtu(batman_if->soft_iface); + if (hard_iface->soft_iface) + update_min_mtu(hard_iface->soft_iface); break; case NETDEV_CHANGEADDR: - if (batman_if->if_status == IF_NOT_IN_USE) + if (hard_iface->if_status == IF_NOT_IN_USE) goto hardif_put; - check_known_mac_addr(batman_if->net_dev); - update_mac_addresses(batman_if); + check_known_mac_addr(hard_iface->net_dev); + update_mac_addresses(hard_iface); - bat_priv = netdev_priv(batman_if->soft_iface); - if (batman_if == bat_priv->primary_if) + bat_priv = netdev_priv(hard_iface->soft_iface); + if (hard_iface == bat_priv->primary_if) update_primary_addr(bat_priv); break; default: @@ -546,7 +547,7 @@ static int hard_if_event(struct notifier_block *this, }; hardif_put: - hardif_free_ref(batman_if); + hardif_free_ref(hard_iface); out: return NOTIFY_DONE; } @@ -559,10 +560,10 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, { struct bat_priv *bat_priv; struct batman_packet *batman_packet; - struct batman_if *batman_if; + struct hard_iface *hard_iface; int ret; - batman_if = container_of(ptype, struct batman_if, batman_adv_ptype); + hard_iface = container_of(ptype, struct hard_iface, batman_adv_ptype); skb = skb_share_check(skb, GFP_ATOMIC); /* skb was released by skb_share_check() */ @@ -578,16 +579,16 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, || !skb_mac_header(skb))) goto err_free; - if (!batman_if->soft_iface) + if (!hard_iface->soft_iface) goto err_free; - bat_priv = netdev_priv(batman_if->soft_iface); + bat_priv = netdev_priv(hard_iface->soft_iface); if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE) goto err_free; /* discard frames on not active interfaces */ - if (batman_if->if_status != IF_ACTIVE) + if (hard_iface->if_status != IF_ACTIVE) goto err_free; batman_packet = (struct batman_packet *)skb->data; @@ -605,32 +606,32 @@ static int batman_skb_recv(struct sk_buff *skb, struct net_device *dev, switch (batman_packet->packet_type) { /* batman originator packet */ case BAT_PACKET: - ret = recv_bat_packet(skb, batman_if); + ret = recv_bat_packet(skb, hard_iface); break; /* batman icmp packet */ case BAT_ICMP: - ret = recv_icmp_packet(skb, batman_if); + ret = recv_icmp_packet(skb, hard_iface); break; /* unicast packet */ case BAT_UNICAST: - ret = recv_unicast_packet(skb, batman_if); + ret = recv_unicast_packet(skb, hard_iface); break; /* fragmented unicast packet */ case BAT_UNICAST_FRAG: - ret = recv_ucast_frag_packet(skb, batman_if); + ret = recv_ucast_frag_packet(skb, hard_iface); break; /* broadcast packet */ case BAT_BCAST: - ret = recv_bcast_packet(skb, batman_if); + ret = recv_bcast_packet(skb, hard_iface); break; /* vis packet */ case BAT_VIS: - ret = recv_vis_packet(skb, batman_if); + ret = recv_vis_packet(skb, hard_iface); break; default: ret = NET_RX_DROP; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index e488b90..a9ddf36 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -31,18 +31,18 @@ extern struct notifier_block hard_if_notifier; -struct batman_if *get_batman_if_by_netdev(struct net_device *net_dev); -int hardif_enable_interface(struct batman_if *batman_if, char *iface_name); -void hardif_disable_interface(struct batman_if *batman_if); +struct hard_iface *hardif_get_by_netdev(struct net_device *net_dev); +int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name); +void hardif_disable_interface(struct hard_iface *hard_iface); void hardif_remove_interfaces(void); int hardif_min_mtu(struct net_device *soft_iface); void update_min_mtu(struct net_device *soft_iface); void hardif_free_rcu(struct rcu_head *rcu); -static inline void hardif_free_ref(struct batman_if *batman_if) +static inline void hardif_free_ref(struct hard_iface *hard_iface) { - if (atomic_dec_and_test(&batman_if->refcount)) - call_rcu(&batman_if->rcu, hardif_free_rcu); + if (atomic_dec_and_test(&hard_iface->refcount)) + call_rcu(&hard_iface->rcu, hardif_free_rcu); } #endif /* _NET_BATMAN_ADV_HARD_INTERFACE_H_ */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 57aea9b..709b33b 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -153,14 +153,14 @@ void dec_module_count(void) int is_my_mac(uint8_t *addr) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &hardif_list, list) { - if (batman_if->if_status != IF_ACTIVE) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->if_status != IF_ACTIVE) continue; - if (compare_eth(batman_if->net_dev->dev_addr, addr)) { + if (compare_eth(hard_iface->net_dev->dev_addr, addr)) { rcu_read_unlock(); return 1; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 84ef9ae..0b91330 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -73,7 +73,7 @@ void neigh_node_free_ref(struct neigh_node *neigh_node) struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, uint8_t *neigh, - struct batman_if *if_incoming) + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *neigh_node; @@ -487,9 +487,9 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) return 0; } -int orig_hash_add_if(struct batman_if *batman_if, int max_if_num) +int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; struct hlist_node *node; struct hlist_head *head; @@ -572,13 +572,13 @@ free_own_sum: return 0; } -int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) +int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; struct hlist_node *node; struct hlist_head *head; - struct batman_if *batman_if_tmp; + struct hard_iface *hard_iface_tmp; struct orig_node *orig_node; int i, ret; @@ -591,7 +591,7 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); ret = orig_node_del_if(orig_node, max_if_num, - batman_if->if_num); + hard_iface->if_num); spin_unlock_bh(&orig_node->ogm_cnt_lock); if (ret == -1) @@ -602,22 +602,22 @@ int orig_hash_del_if(struct batman_if *batman_if, int max_if_num) /* renumber remaining batman interfaces _inside_ of orig_hash_lock */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if_tmp, &hardif_list, list) { - if (batman_if_tmp->if_status == IF_NOT_IN_USE) + list_for_each_entry_rcu(hard_iface_tmp, &hardif_list, list) { + if (hard_iface_tmp->if_status == IF_NOT_IN_USE) continue; - if (batman_if == batman_if_tmp) + if (hard_iface == hard_iface_tmp) continue; - if (batman_if->soft_iface != batman_if_tmp->soft_iface) + if (hard_iface->soft_iface != hard_iface_tmp->soft_iface) continue; - if (batman_if_tmp->if_num > batman_if->if_num) - batman_if_tmp->if_num--; + if (hard_iface_tmp->if_num > hard_iface->if_num) + hard_iface_tmp->if_num--; } rcu_read_unlock(); - batman_if->if_num = -1; + hard_iface->if_num = -1; return 0; err: diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 3d7a39d..5cc0110 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -32,11 +32,11 @@ struct orig_node *get_orig_node(struct bat_priv *bat_priv, uint8_t *addr); struct neigh_node *create_neighbor(struct orig_node *orig_node, struct orig_node *orig_neigh_node, uint8_t *neigh, - struct batman_if *if_incoming); + struct hard_iface *if_incoming); void neigh_node_free_ref(struct neigh_node *neigh_node); int orig_seq_print_text(struct seq_file *seq, void *offset); -int orig_hash_add_if(struct batman_if *batman_if, int max_if_num); -int orig_hash_del_if(struct batman_if *batman_if, int max_if_num); +int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num); +int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num); /* returns 1 if they are the same originator */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 21e93b3..42cb6e2 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -35,9 +35,9 @@ #include "gateway_client.h" #include "unicast.h" -void slide_own_bcast_window(struct batman_if *batman_if) +void slide_own_bcast_window(struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct hashtable_t *hash = bat_priv->orig_hash; struct hlist_node *node; struct hlist_head *head; @@ -52,11 +52,11 @@ void slide_own_bcast_window(struct batman_if *batman_if) rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { spin_lock_bh(&orig_node->ogm_cnt_lock); - word_index = batman_if->if_num * NUM_WORDS; + word_index = hard_iface->if_num * NUM_WORDS; word = &(orig_node->bcast_own[word_index]); bit_get_packet(bat_priv, word, 1, 0); - orig_node->bcast_own_sum[batman_if->if_num] = + orig_node->bcast_own_sum[hard_iface->if_num] = bit_packet_count(word); spin_unlock_bh(&orig_node->ogm_cnt_lock); } @@ -143,7 +143,7 @@ void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, static int is_bidirectional_neigh(struct orig_node *orig_node, struct orig_node *orig_neigh_node, struct batman_packet *batman_packet, - struct batman_if *if_incoming) + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct neigh_node *neigh_node = NULL, *tmp_neigh_node; @@ -368,7 +368,7 @@ static void update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, struct ethhdr *ethhdr, struct batman_packet *batman_packet, - struct batman_if *if_incoming, + struct hard_iface *if_incoming, unsigned char *hna_buff, int hna_buff_len, char is_duplicate) { @@ -533,7 +533,7 @@ static int window_protected(struct bat_priv *bat_priv, */ static char count_real_packets(struct ethhdr *ethhdr, struct batman_packet *batman_packet, - struct batman_if *if_incoming) + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct orig_node *orig_node; @@ -598,10 +598,10 @@ out: void receive_bat_packet(struct ethhdr *ethhdr, struct batman_packet *batman_packet, unsigned char *hna_buff, int hna_buff_len, - struct batman_if *if_incoming) + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); - struct batman_if *batman_if; + struct hard_iface *hard_iface; struct orig_node *orig_neigh_node, *orig_node; char has_directlink_flag; char is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; @@ -643,23 +643,23 @@ void receive_bat_packet(struct ethhdr *ethhdr, has_directlink_flag); rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &hardif_list, list) { - if (batman_if->if_status != IF_ACTIVE) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->if_status != IF_ACTIVE) continue; - if (batman_if->soft_iface != if_incoming->soft_iface) + if (hard_iface->soft_iface != if_incoming->soft_iface) continue; if (compare_eth(ethhdr->h_source, - batman_if->net_dev->dev_addr)) + hard_iface->net_dev->dev_addr)) is_my_addr = 1; if (compare_eth(batman_packet->orig, - batman_if->net_dev->dev_addr)) + hard_iface->net_dev->dev_addr)) is_my_orig = 1; if (compare_eth(batman_packet->prev_sender, - batman_if->net_dev->dev_addr)) + hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; if (compare_eth(ethhdr->h_source, broadcast_addr)) @@ -828,7 +828,7 @@ out: orig_node_free_ref(orig_node); } -int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) +int recv_bat_packet(struct sk_buff *skb, struct hard_iface *hard_iface) { struct ethhdr *ethhdr; @@ -859,7 +859,7 @@ int recv_bat_packet(struct sk_buff *skb, struct batman_if *batman_if) receive_aggr_bat_packet(ethhdr, skb->data, skb_headlen(skb), - batman_if); + hard_iface); kfree_skb(skb); return NET_RX_SUCCESS; @@ -997,7 +997,7 @@ out: } -int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct icmp_packet_rr *icmp_packet; @@ -1097,7 +1097,7 @@ out: * refcount.*/ struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct batman_if *recv_if) + struct hard_iface *recv_if) { struct orig_node *primary_orig_node; struct orig_node *router_orig; @@ -1263,7 +1263,7 @@ static int check_unicast_packet(struct sk_buff *skb, int hdr_size) return 0; } -int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, +int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if, int hdr_size) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); @@ -1349,7 +1349,7 @@ out: return ret; } -int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct unicast_packet *unicast_packet; int hdr_size = sizeof(struct unicast_packet); @@ -1368,7 +1368,7 @@ int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if) return route_unicast_packet(skb, recv_if, hdr_size); } -int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct unicast_frag_packet *unicast_packet; @@ -1402,7 +1402,7 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if) } -int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct orig_node *orig_node = NULL; @@ -1487,7 +1487,7 @@ out: return ret; } -int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if) +int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct vis_packet *vis_packet; struct ethhdr *ethhdr; diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index e2a9872..5efceaf 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -22,25 +22,25 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -void slide_own_bcast_window(struct batman_if *batman_if); +void slide_own_bcast_window(struct hard_iface *hard_iface); void receive_bat_packet(struct ethhdr *ethhdr, struct batman_packet *batman_packet, unsigned char *hna_buff, int hna_buff_len, - struct batman_if *if_incoming); + struct hard_iface *if_incoming); void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, struct neigh_node *neigh_node, unsigned char *hna_buff, int hna_buff_len); -int route_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if, +int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if, int hdr_size); -int recv_icmp_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_unicast_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_ucast_frag_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_bcast_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_vis_packet(struct sk_buff *skb, struct batman_if *recv_if); -int recv_bat_packet(struct sk_buff *skb, struct batman_if *recv_if); +int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_bcast_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_vis_packet(struct sk_buff *skb, struct hard_iface *recv_if); +int recv_bat_packet(struct sk_buff *skb, struct hard_iface *recv_if); struct neigh_node *find_router(struct bat_priv *bat_priv, struct orig_node *orig_node, - struct batman_if *recv_if); + struct hard_iface *recv_if); void bonding_candidate_del(struct orig_node *orig_node, struct neigh_node *neigh_node); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index c4f3e49..d49e54d 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -56,20 +56,20 @@ static unsigned long forward_send_time(void) /* send out an already prepared packet to the given address via the * specified batman interface */ int send_skb_packet(struct sk_buff *skb, - struct batman_if *batman_if, + struct hard_iface *hard_iface, uint8_t *dst_addr) { struct ethhdr *ethhdr; - if (batman_if->if_status != IF_ACTIVE) + if (hard_iface->if_status != IF_ACTIVE) goto send_skb_err; - if (unlikely(!batman_if->net_dev)) + if (unlikely(!hard_iface->net_dev)) goto send_skb_err; - if (!(batman_if->net_dev->flags & IFF_UP)) { + if (!(hard_iface->net_dev->flags & IFF_UP)) { pr_warning("Interface %s is not up - can't send packet via " - "that interface!\n", batman_if->net_dev->name); + "that interface!\n", hard_iface->net_dev->name); goto send_skb_err; } @@ -80,7 +80,7 @@ int send_skb_packet(struct sk_buff *skb, skb_reset_mac_header(skb); ethhdr = (struct ethhdr *) skb_mac_header(skb); - memcpy(ethhdr->h_source, batman_if->net_dev->dev_addr, ETH_ALEN); + memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); @@ -88,7 +88,7 @@ int send_skb_packet(struct sk_buff *skb, skb->priority = TC_PRIO_CONTROL; skb->protocol = __constant_htons(ETH_P_BATMAN); - skb->dev = batman_if->net_dev; + skb->dev = hard_iface->net_dev; /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP @@ -102,16 +102,16 @@ send_skb_err: /* Send a packet to a given interface */ static void send_packet_to_if(struct forw_packet *forw_packet, - struct batman_if *batman_if) + struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); char *fwd_str; uint8_t packet_num; int16_t buff_pos; struct batman_packet *batman_packet; struct sk_buff *skb; - if (batman_if->if_status != IF_ACTIVE) + if (hard_iface->if_status != IF_ACTIVE) return; packet_num = 0; @@ -126,7 +126,7 @@ static void send_packet_to_if(struct forw_packet *forw_packet, /* we might have aggregated direct link packets with an * ordinary base packet */ if ((forw_packet->direct_link_flags & (1 << packet_num)) && - (forw_packet->if_incoming == batman_if)) + (forw_packet->if_incoming == hard_iface)) batman_packet->flags |= DIRECTLINK; else batman_packet->flags &= ~DIRECTLINK; @@ -142,7 +142,8 @@ static void send_packet_to_if(struct forw_packet *forw_packet, batman_packet->tq, batman_packet->ttl, (batman_packet->flags & DIRECTLINK ? "on" : "off"), - batman_if->net_dev->name, batman_if->net_dev->dev_addr); + hard_iface->net_dev->name, + hard_iface->net_dev->dev_addr); buff_pos += sizeof(struct batman_packet) + (batman_packet->num_hna * ETH_ALEN); @@ -154,13 +155,13 @@ static void send_packet_to_if(struct forw_packet *forw_packet, /* create clone because function is called more than once */ skb = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb) - send_skb_packet(skb, batman_if, broadcast_addr); + send_skb_packet(skb, hard_iface, broadcast_addr); } /* send a batman packet */ static void send_packet(struct forw_packet *forw_packet) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; struct net_device *soft_iface; struct bat_priv *bat_priv; struct batman_packet *batman_packet = @@ -204,17 +205,17 @@ static void send_packet(struct forw_packet *forw_packet) /* broadcast on every interface */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &hardif_list, list) { - if (batman_if->soft_iface != soft_iface) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) continue; - send_packet_to_if(forw_packet, batman_if); + send_packet_to_if(forw_packet, hard_iface); } rcu_read_unlock(); } static void rebuild_batman_packet(struct bat_priv *bat_priv, - struct batman_if *batman_if) + struct hard_iface *hard_iface) { int new_len; unsigned char *new_buff; @@ -226,7 +227,7 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv, /* keep old buffer if kmalloc should fail */ if (new_buff) { - memcpy(new_buff, batman_if->packet_buff, + memcpy(new_buff, hard_iface->packet_buff, sizeof(struct batman_packet)); batman_packet = (struct batman_packet *)new_buff; @@ -234,21 +235,21 @@ static void rebuild_batman_packet(struct bat_priv *bat_priv, new_buff + sizeof(struct batman_packet), new_len - sizeof(struct batman_packet)); - kfree(batman_if->packet_buff); - batman_if->packet_buff = new_buff; - batman_if->packet_len = new_len; + kfree(hard_iface->packet_buff); + hard_iface->packet_buff = new_buff; + hard_iface->packet_len = new_len; } } -void schedule_own_packet(struct batman_if *batman_if) +void schedule_own_packet(struct hard_iface *hard_iface) { - struct bat_priv *bat_priv = netdev_priv(batman_if->soft_iface); + struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned long send_time; struct batman_packet *batman_packet; int vis_server; - if ((batman_if->if_status == IF_NOT_IN_USE) || - (batman_if->if_status == IF_TO_BE_REMOVED)) + if ((hard_iface->if_status == IF_NOT_IN_USE) || + (hard_iface->if_status == IF_TO_BE_REMOVED)) return; vis_server = atomic_read(&bat_priv->vis_mode); @@ -260,51 +261,51 @@ void schedule_own_packet(struct batman_if *batman_if) * outdated packets (especially uninitialized mac addresses) in the * packet queue */ - if (batman_if->if_status == IF_TO_BE_ACTIVATED) - batman_if->if_status = IF_ACTIVE; + if (hard_iface->if_status == IF_TO_BE_ACTIVATED) + hard_iface->if_status = IF_ACTIVE; /* if local hna has changed and interface is a primary interface */ if ((atomic_read(&bat_priv->hna_local_changed)) && - (batman_if == bat_priv->primary_if)) - rebuild_batman_packet(bat_priv, batman_if); + (hard_iface == bat_priv->primary_if)) + rebuild_batman_packet(bat_priv, hard_iface); /** * NOTE: packet_buff might just have been re-allocated in * rebuild_batman_packet() */ - batman_packet = (struct batman_packet *)batman_if->packet_buff; + batman_packet = (struct batman_packet *)hard_iface->packet_buff; /* change sequence number to network order */ batman_packet->seqno = - htonl((uint32_t)atomic_read(&batman_if->seqno)); + htonl((uint32_t)atomic_read(&hard_iface->seqno)); if (vis_server == VIS_TYPE_SERVER_SYNC) batman_packet->flags |= VIS_SERVER; else batman_packet->flags &= ~VIS_SERVER; - if ((batman_if == bat_priv->primary_if) && + if ((hard_iface == bat_priv->primary_if) && (atomic_read(&bat_priv->gw_mode) == GW_MODE_SERVER)) batman_packet->gw_flags = (uint8_t)atomic_read(&bat_priv->gw_bandwidth); else batman_packet->gw_flags = 0; - atomic_inc(&batman_if->seqno); + atomic_inc(&hard_iface->seqno); - slide_own_bcast_window(batman_if); + slide_own_bcast_window(hard_iface); send_time = own_send_time(bat_priv); add_bat_packet_to_list(bat_priv, - batman_if->packet_buff, - batman_if->packet_len, - batman_if, 1, send_time); + hard_iface->packet_buff, + hard_iface->packet_len, + hard_iface, 1, send_time); } void schedule_forward_packet(struct orig_node *orig_node, struct ethhdr *ethhdr, struct batman_packet *batman_packet, uint8_t directlink, int hna_buff_len, - struct batman_if *if_incoming) + struct hard_iface *if_incoming) { struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface); unsigned char in_tq, in_ttl, tq_avg = 0; @@ -443,7 +444,7 @@ out: static void send_outstanding_bcast_packet(struct work_struct *work) { - struct batman_if *batman_if; + struct hard_iface *hard_iface; struct delayed_work *delayed_work = container_of(work, struct delayed_work, work); struct forw_packet *forw_packet = @@ -461,14 +462,14 @@ static void send_outstanding_bcast_packet(struct work_struct *work) /* rebroadcast packet */ rcu_read_lock(); - list_for_each_entry_rcu(batman_if, &hardif_list, list) { - if (batman_if->soft_iface != soft_iface) + list_for_each_entry_rcu(hard_iface, &hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) continue; /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb1) - send_skb_packet(skb1, batman_if, broadcast_addr); + send_skb_packet(skb1, hard_iface, broadcast_addr); } rcu_read_unlock(); @@ -521,15 +522,15 @@ out: } void purge_outstanding_packets(struct bat_priv *bat_priv, - struct batman_if *batman_if) + struct hard_iface *hard_iface) { struct forw_packet *forw_packet; struct hlist_node *tmp_node, *safe_tmp_node; - if (batman_if) + if (hard_iface) bat_dbg(DBG_BATMAN, bat_priv, "purge_outstanding_packets(): %s\n", - batman_if->net_dev->name); + hard_iface->net_dev->name); else bat_dbg(DBG_BATMAN, bat_priv, "purge_outstanding_packets()\n"); @@ -543,8 +544,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, * if purge_outstanding_packets() was called with an argmument * we delete only packets belonging to the given interface */ - if ((batman_if) && - (forw_packet->if_incoming != batman_if)) + if ((hard_iface) && + (forw_packet->if_incoming != hard_iface)) continue; spin_unlock_bh(&bat_priv->forw_bcast_list_lock); @@ -567,8 +568,8 @@ void purge_outstanding_packets(struct bat_priv *bat_priv, * if purge_outstanding_packets() was called with an argmument * we delete only packets belonging to the given interface */ - if ((batman_if) && - (forw_packet->if_incoming != batman_if)) + if ((hard_iface) && + (forw_packet->if_incoming != hard_iface)) continue; spin_unlock_bh(&bat_priv->forw_bat_list_lock); diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index b68c272..7b2ff19 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -23,17 +23,17 @@ #define _NET_BATMAN_ADV_SEND_H_ int send_skb_packet(struct sk_buff *skb, - struct batman_if *batman_if, + struct hard_iface *hard_iface, uint8_t *dst_addr); -void schedule_own_packet(struct batman_if *batman_if); +void schedule_own_packet(struct hard_iface *hard_iface); void schedule_forward_packet(struct orig_node *orig_node, struct ethhdr *ethhdr, struct batman_packet *batman_packet, uint8_t directlink, int hna_buff_len, - struct batman_if *if_outgoing); + struct hard_iface *if_outgoing); int add_bcast_packet_to_list(struct bat_priv *bat_priv, struct sk_buff *skb); void send_outstanding_bat_packet(struct work_struct *work); void purge_outstanding_packets(struct bat_priv *bat_priv, - struct batman_if *batman_if); + struct hard_iface *hard_iface); #endif /* _NET_BATMAN_ADV_SEND_H_ */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index bea2dcf..95d1c3f 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -414,7 +414,7 @@ end: } void interface_rx(struct net_device *soft_iface, - struct sk_buff *skb, struct batman_if *recv_if, + struct sk_buff *skb, struct hard_iface *recv_if, int hdr_size) { struct bat_priv *bat_priv = netdev_priv(soft_iface); diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index e7b0e1a..80a3607 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -27,7 +27,7 @@ int softif_neigh_seq_print_text(struct seq_file *seq, void *offset); void softif_neigh_purge(struct bat_priv *bat_priv); int interface_tx(struct sk_buff *skb, struct net_device *soft_iface); void interface_rx(struct net_device *soft_iface, - struct sk_buff *skb, struct batman_if *recv_if, + struct sk_buff *skb, struct hard_iface *recv_if, int hdr_size); struct net_device *softif_create(char *name); void softif_destroy(struct net_device *soft_iface); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a9bf186..83445cf 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -33,7 +33,7 @@ sizeof(struct bcast_packet)))) -struct batman_if { +struct hard_iface { struct list_head list; int16_t if_num; char if_status; @@ -124,7 +124,7 @@ struct neigh_node { atomic_t refcount; struct rcu_head rcu; struct orig_node *orig_node; - struct batman_if *if_incoming; + struct hard_iface *if_incoming; }; @@ -148,7 +148,7 @@ struct bat_priv { struct hlist_head softif_neigh_list; struct softif_neigh *softif_neigh; struct debug_log *debug_log; - struct batman_if *primary_if; + struct hard_iface *primary_if; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; @@ -217,7 +217,7 @@ struct forw_packet { uint32_t direct_link_flags; uint8_t num_packets; struct delayed_work delayed_work; - struct batman_if *if_incoming; + struct hard_iface *if_incoming; }; /* While scanning for vis-entries of a particular vis-originator diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index b411438..7238f04 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -213,7 +213,7 @@ out: } int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, - struct batman_if *batman_if, uint8_t dstaddr[]) + struct hard_iface *hard_iface, uint8_t dstaddr[]) { struct unicast_packet tmp_uc, *unicast_packet; struct sk_buff *frag_skb; @@ -258,12 +258,12 @@ int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, frag1->flags = UNI_FRAG_HEAD | large_tail; frag2->flags = large_tail; - seqno = atomic_add_return(2, &batman_if->frag_seqno); + seqno = atomic_add_return(2, &hard_iface->frag_seqno); frag1->seqno = htons(seqno - 1); frag2->seqno = htons(seqno); - send_skb_packet(skb, batman_if, dstaddr); - send_skb_packet(frag_skb, batman_if, dstaddr); + send_skb_packet(skb, hard_iface, dstaddr); + send_skb_packet(frag_skb, hard_iface, dstaddr); return NET_RX_SUCCESS; drop_frag: diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 8897308..16ad7a9 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -32,7 +32,7 @@ int frag_reassemble_skb(struct sk_buff *skb, struct bat_priv *bat_priv, void frag_list_free(struct list_head *head); int unicast_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv); int frag_send_skb(struct sk_buff *skb, struct bat_priv *bat_priv, - struct batman_if *batman_if, uint8_t dstaddr[]); + struct hard_iface *hard_iface, uint8_t dstaddr[]); static inline int frag_can_reassemble(struct sk_buff *skb, int mtu) { diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index e8911cb..3da499b 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -730,7 +730,7 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, struct orig_node *orig_node; struct vis_packet *packet; struct sk_buff *skb; - struct batman_if *batman_if; + struct hard_iface *hard_iface; uint8_t dstaddr[ETH_ALEN]; int i; @@ -755,12 +755,12 @@ static void broadcast_vis_packet(struct bat_priv *bat_priv, continue; memcpy(packet->target_orig, orig_node->orig, ETH_ALEN); - batman_if = orig_node->router->if_incoming; + hard_iface = orig_node->router->if_incoming; memcpy(dstaddr, orig_node->router->addr, ETH_ALEN); skb = skb_clone(info->skb_packet, GFP_ATOMIC); if (skb) - send_skb_packet(skb, batman_if, dstaddr); + send_skb_packet(skb, hard_iface, dstaddr); } rcu_read_unlock(); -- cgit v1.1 From 7cefb149a6b0e4f7c5adfa27dcf285b729063848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Wed, 2 Mar 2011 17:39:31 +0000 Subject: batman-adv: Remove unused hdr_size variable in route_unicast_packet() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner --- net/batman-adv/routing.c | 7 +++---- net/batman-adv/routing.h | 3 +-- net/batman-adv/soft-interface.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 42cb6e2..c172f5d 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1263,8 +1263,7 @@ static int check_unicast_packet(struct sk_buff *skb, int hdr_size) return 0; } -int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if, - int hdr_size) +int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct orig_node *orig_node = NULL; @@ -1365,7 +1364,7 @@ int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if) return NET_RX_SUCCESS; } - return route_unicast_packet(skb, recv_if, hdr_size); + return route_unicast_packet(skb, recv_if); } int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if) @@ -1398,7 +1397,7 @@ int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if) return NET_RX_SUCCESS; } - return route_unicast_packet(skb, recv_if, hdr_size); + return route_unicast_packet(skb, recv_if); } diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 5efceaf..b5a064c 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -30,8 +30,7 @@ void receive_bat_packet(struct ethhdr *ethhdr, void update_routes(struct bat_priv *bat_priv, struct orig_node *orig_node, struct neigh_node *neigh_node, unsigned char *hna_buff, int hna_buff_len); -int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if, - int hdr_size); +int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_icmp_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if); int recv_ucast_frag_packet(struct sk_buff *skb, struct hard_iface *recv_if); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 95d1c3f..6b514ec 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -462,7 +462,7 @@ void interface_rx(struct net_device *soft_iface, memcpy(unicast_packet->dest, bat_priv->softif_neigh->addr, ETH_ALEN); - ret = route_unicast_packet(skb, recv_if, hdr_size); + ret = route_unicast_packet(skb, recv_if); if (ret == NET_RX_DROP) goto dropped; -- cgit v1.1 From e44d8fe2b5c27ecc230f886d4cc49fcbd86f87a0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 4 Mar 2011 21:36:41 +0000 Subject: batman-adv: Disallow regular interface as mesh device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When trying to associate a net_device with another net_device which already exists, batman-adv assumes that this interface is a fully initialized batman mesh interface without checking it. The behaviour when accessing data behind netdev_priv of a random net_device is undefined and potentially dangerous. Reported-by: Linus Lüssing Signed-off-by: Marek Lindner --- net/batman-adv/hard-interface.c | 34 ++++++++++++++++++++++------------ net/batman-adv/soft-interface.c | 13 +++++++++++++ net/batman-adv/soft-interface.h | 1 + 3 files changed, 36 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 95a35b6..b3058e4 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -79,13 +79,8 @@ static int is_valid_iface(struct net_device *net_dev) return 0; /* no batman over batman */ -#ifdef HAVE_NET_DEVICE_OPS - if (net_dev->netdev_ops->ndo_start_xmit == interface_tx) + if (softif_is_valid(net_dev)) return 0; -#else - if (net_dev->hard_start_xmit == interface_tx) - return 0; -#endif /* Device is being bridged */ /* if (net_dev->priv_flags & IFF_BRIDGE_PORT) @@ -282,6 +277,8 @@ int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) { struct bat_priv *bat_priv; struct batman_packet *batman_packet; + struct net_device *soft_iface; + int ret; if (hard_iface->if_status != IF_NOT_IN_USE) goto out; @@ -289,18 +286,30 @@ int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) if (!atomic_inc_not_zero(&hard_iface->refcount)) goto out; - hard_iface->soft_iface = dev_get_by_name(&init_net, iface_name); + soft_iface = dev_get_by_name(&init_net, iface_name); - if (!hard_iface->soft_iface) { - hard_iface->soft_iface = softif_create(iface_name); + if (!soft_iface) { + soft_iface = softif_create(iface_name); - if (!hard_iface->soft_iface) + if (!soft_iface) { + ret = -ENOMEM; goto err; + } /* dev_get_by_name() increases the reference counter for us */ - dev_hold(hard_iface->soft_iface); + dev_hold(soft_iface); + } + + if (!softif_is_valid(soft_iface)) { + pr_err("Can't create batman mesh interface %s: " + "already exists as regular interface\n", + soft_iface->name); + dev_put(soft_iface); + ret = -EINVAL; + goto err; } + hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); hard_iface->packet_len = BAT_PACKET_LEN; hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); @@ -308,6 +317,7 @@ int hardif_enable_interface(struct hard_iface *hard_iface, char *iface_name) if (!hard_iface->packet_buff) { bat_err(hard_iface->soft_iface, "Can't add interface packet " "(%s): out of memory\n", hard_iface->net_dev->name); + ret = -ENOMEM; goto err; } @@ -370,7 +380,7 @@ out: err: hardif_free_ref(hard_iface); - return -ENOMEM; + return ret; } void hardif_disable_interface(struct hard_iface *hard_iface) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6b514ec..9ed2614 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -622,6 +622,19 @@ void softif_destroy(struct net_device *soft_iface) unregister_netdevice(soft_iface); } +int softif_is_valid(struct net_device *net_dev) +{ +#ifdef HAVE_NET_DEVICE_OPS + if (net_dev->netdev_ops->ndo_start_xmit == interface_tx) + return 1; +#else + if (net_dev->hard_start_xmit == interface_tx) + return 1; +#endif + + return 0; +} + /* ethtool */ static int bat_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 80a3607..4789b6f 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -31,5 +31,6 @@ void interface_rx(struct net_device *soft_iface, int hdr_size); struct net_device *softif_create(char *name); void softif_destroy(struct net_device *soft_iface); +int softif_is_valid(struct net_device *net_dev); #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ -- cgit v1.1 From 2d0123a5d635e336dbab21eba62e8dd4eb3e39a0 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Fri, 4 Mar 2011 15:14:16 +0800 Subject: mac80211: remove unused macros Compile test only. Signed-off-by: Shan Wei Signed-off-by: John W. Linville --- net/mac80211/key.h | 1 - net/mac80211/rc80211_pid.h | 3 --- net/mac80211/work.c | 1 - 3 files changed, 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 8106aa1..4ddbe27 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -21,7 +21,6 @@ #define WEP_IV_LEN 4 #define WEP_ICV_LEN 4 -#define ALG_TKIP_KEY_LEN 32 #define ALG_CCMP_KEY_LEN 16 #define CCMP_HDR_LEN 8 #define CCMP_MIC_LEN 8 diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 1a873f0..6510f8e 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -24,9 +24,6 @@ /* Fixed point arithmetic shifting amount. */ #define RC_PID_ARITH_SHIFT 8 -/* Fixed point arithmetic factor. */ -#define RC_PID_ARITH_FACTOR (1 << RC_PID_ARITH_SHIFT) - /* Proportional PID component coefficient. */ #define RC_PID_COEFF_P 15 /* Integral PID component coefficient. */ diff --git a/net/mac80211/work.c b/net/mac80211/work.c index 204f0a4..e73c8ca 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -30,7 +30,6 @@ #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_MAX_TRIES 3 -#define IEEE80211_MAX_PROBE_TRIES 5 enum work_action { WORK_ACT_MISMATCH, -- cgit v1.1 From b196d031f2bb29c253050d554130e41c7e3cbfb0 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Mon, 7 Mar 2011 13:09:12 +0100 Subject: mac80211: Add log message to ieee80211_restart_hw() Add a log message to ieee80211_restart_hw() to highlight that special codepath in the logs. This helps debugging bugs in the rarely tested restart code. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- net/mac80211/main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 2543e48..562d298 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -380,6 +380,9 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw) trace_api_restart_hw(local); + wiphy_info(hw->wiphy, + "Hardware restart was requested\n"); + /* use this reason, ieee80211_reconfig will unblock it */ ieee80211_stop_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_SUSPEND); -- cgit v1.1 From d07bfd8b6f20a81d7ec65c50f35b053d9e3aa740 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 7 Mar 2011 15:48:41 +0100 Subject: mac80211: fix scan race, simplify code The scan code has a race that Michael reported he ran into, but it's easy to fix while at the same time simplifying the code. The race resulted in the following warning: ------------[ cut here ]------------ WARNING: at net/mac80211/scan.c:310 ieee80211_rx_bss_free+0x20c/0x4b8 [mac80211]() Modules linked in: [...] [] (unwind_backtrace+0x0/0xe0) from [] (warn_slowpath_common+0x4c/0x64) [... backtrace wasn't useful ...] Reported-by: Michael Buesch Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/scan.c | 64 ++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 8429545..489b6ad 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -258,10 +258,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, bool was_hw_scan) { struct ieee80211_local *local = hw_to_local(hw); + bool on_oper_chan; + bool enable_beacons = false; lockdep_assert_held(&local->mtx); @@ -275,12 +277,12 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, aborted = true; if (WARN_ON(!local->scan_req)) - return false; + return; if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req); if (rc == 0) - return false; + return; } kfree(local->hw_scan_req); @@ -294,26 +296,11 @@ static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, local->scanning = 0; local->scan_channel = NULL; - return true; -} - -static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, - bool was_hw_scan) -{ - struct ieee80211_local *local = hw_to_local(hw); - bool on_oper_chan; - bool enable_beacons = false; - - mutex_lock(&local->mtx); on_oper_chan = ieee80211_cfg_on_oper_channel(local); - WARN_ON(local->scanning & (SCAN_SW_SCANNING | SCAN_HW_SCANNING)); - - if (was_hw_scan || !on_oper_chan) { - if (WARN_ON(local->scan_channel)) - local->scan_channel = NULL; + if (was_hw_scan || !on_oper_chan) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); - } else + else /* Set power back to normal operating levels. */ ieee80211_hw_config(local, 0); @@ -331,7 +318,6 @@ static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw, } ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); @@ -686,12 +672,14 @@ void ieee80211_scan_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, scan_work.work); - struct ieee80211_sub_if_data *sdata = local->scan_sdata; + struct ieee80211_sub_if_data *sdata; unsigned long next_delay = 0; - bool aborted, hw_scan, finish; + bool aborted, hw_scan; mutex_lock(&local->mtx); + sdata = local->scan_sdata; + if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); goto out_complete; @@ -755,17 +743,11 @@ void ieee80211_scan_work(struct work_struct *work) } while (next_delay == 0); ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay); - mutex_unlock(&local->mtx); - return; + goto out; out_complete: hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning); - finish = __ieee80211_scan_completed(&local->hw, aborted, hw_scan); - mutex_unlock(&local->mtx); - if (finish) - __ieee80211_scan_completed_finish(&local->hw, hw_scan); - return; - + __ieee80211_scan_completed(&local->hw, aborted, hw_scan); out: mutex_unlock(&local->mtx); } @@ -835,7 +817,6 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, void ieee80211_scan_cancel(struct ieee80211_local *local) { bool abortscan; - bool finish = false; /* * We are only canceling software scan, or deferred scan that was not @@ -855,14 +836,17 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) mutex_lock(&local->mtx); abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning); - if (abortscan) - finish = __ieee80211_scan_completed(&local->hw, true, false); - mutex_unlock(&local->mtx); - if (abortscan) { - /* The scan is canceled, but stop work from being pending */ - cancel_delayed_work_sync(&local->scan_work); + /* + * The scan is canceled, but stop work from being pending. + * + * If the work is currently running, it must be blocked on + * the mutex, but we'll set scan_sdata = NULL and it'll + * simply exit once it acquires the mutex. + */ + cancel_delayed_work(&local->scan_work); + /* and clean up */ + __ieee80211_scan_completed(&local->hw, true, false); } - if (finish) - __ieee80211_scan_completed_finish(&local->hw, false); + mutex_unlock(&local->mtx); } -- cgit v1.1 From 4c8237cd76a0510677dc2e3dd0f8866ec8e0b1e5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Mar 2011 14:27:38 -0800 Subject: ipv4: Validate route entry type at insert instead of every lookup. fib_semantic_match() requires that if the type doesn't signal an automatic error, it must be of type RTN_UNICAST, RTN_LOCAL, RTN_BROADCAST, RTN_ANYCAST, or RTN_MULTICAST. Checking this every route lookup is pointless work. Instead validate it during route insertion, via fib_create_info(). Also, there was nothing making sure the type value was less than RTN_MAX, so add that missing check while we're here. Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 54 +++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 562f34c..c29291b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -707,6 +707,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg) int nhs = 1; struct net *net = cfg->fc_nlinfo.nl_net; + if (cfg->fc_type > RTN_MAX) + goto err_inval; + /* Fast check to catch the most weird cases */ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) goto err_inval; @@ -812,6 +815,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg) if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; goto link_it; + } else { + switch (cfg->fc_type) { + case RTN_UNICAST: + case RTN_LOCAL: + case RTN_BROADCAST: + case RTN_ANYCAST: + case RTN_MULTICAST: + break; + default: + goto err_inval; + } } if (cfg->fc_scope > RT_SCOPE_HOST) @@ -915,35 +929,23 @@ int fib_semantic_match(struct fib_table *tb, struct list_head *head, if (fi->fib_flags & RTNH_F_DEAD) continue; - switch (fa->fa_type) { - case RTN_UNICAST: - case RTN_LOCAL: - case RTN_BROADCAST: - case RTN_ANYCAST: - case RTN_MULTICAST: - for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (!flp->oif || flp->oif == nh->nh_oif) - break; - } + for_nexthops(fi) { + if (nh->nh_flags & RTNH_F_DEAD) + continue; + if (!flp->oif || flp->oif == nh->nh_oif) + break; + } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (nhsel < fi->fib_nhs) { - nh_sel = nhsel; - goto out_fill_res; - } + if (nhsel < fi->fib_nhs) { + nh_sel = nhsel; + goto out_fill_res; + } #else - if (nhsel < 1) - goto out_fill_res; + if (nhsel < 1) + goto out_fill_res; #endif - endfor_nexthops(fi); - continue; - - default: - pr_warning("fib_semantic_match bad type %#x\n", - fa->fa_type); - return -EINVAL; - } + endfor_nexthops(fi); + continue; } return err; } -- cgit v1.1 From 3be0686b6e2f953afe83626e871b4a7b0ceae49b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Mar 2011 15:01:10 -0800 Subject: ipv4: Inline fib_semantic_match into check_leaf This elimiates a lot of pure overhead due to parameter passing. Signed-off-by: David S. Miller --- net/ipv4/fib_lookup.h | 7 +++++ net/ipv4/fib_semantics.c | 68 +----------------------------------------------- net/ipv4/fib_trie.c | 51 ++++++++++++++++++++++++++++++------ 3 files changed, 51 insertions(+), 75 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index d5c40d8..84db2da 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -51,4 +51,11 @@ static inline void fib_result_assign(struct fib_result *res, res->fi = fi; } +struct fib_prop { + int error; + u8 scope; +}; + +extern const struct fib_prop fib_props[RTN_MAX + 1]; + #endif /* _FIB_LOOKUP_H */ diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c29291b..6349a21 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -90,11 +90,7 @@ static DEFINE_SPINLOCK(fib_multipath_lock); #define endfor_nexthops(fi) } -static const struct -{ - int error; - u8 scope; -} fib_props[RTN_MAX + 1] = { +const struct fib_prop fib_props[RTN_MAX + 1] = { [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE, @@ -902,68 +898,6 @@ failure: return ERR_PTR(err); } -/* Note! fib_semantic_match intentionally uses RCU list functions. */ -int fib_semantic_match(struct fib_table *tb, struct list_head *head, - const struct flowi *flp, struct fib_result *res, - int prefixlen, int fib_flags) -{ - struct fib_alias *fa; - int nh_sel = 0; - - list_for_each_entry_rcu(fa, head, fa_list) { - int err; - - if (fa->fa_tos && - fa->fa_tos != flp->fl4_tos) - continue; - - if (fa->fa_scope < flp->fl4_scope) - continue; - - fib_alias_accessed(fa); - - err = fib_props[fa->fa_type].error; - if (err == 0) { - struct fib_info *fi = fa->fa_info; - - if (fi->fib_flags & RTNH_F_DEAD) - continue; - - for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) - continue; - if (!flp->oif || flp->oif == nh->nh_oif) - break; - } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - if (nhsel < fi->fib_nhs) { - nh_sel = nhsel; - goto out_fill_res; - } -#else - if (nhsel < 1) - goto out_fill_res; -#endif - endfor_nexthops(fi); - continue; - } - return err; - } - return 1; - -out_fill_res: - res->prefixlen = prefixlen; - res->nh_sel = nh_sel; - res->type = fa->fa_type; - res->scope = fa->fa_scope; - res->fi = fa->fa_info; - res->table = tb; - res->fa_head = head; - if (!(fib_flags & FIB_LOOKUP_NOREF)) - atomic_inc(&res->fi->fib_clntref); - return 0; -} - /* Find appropriate source address to this destination */ __be32 __fib_res_prefsrc(struct fib_result *res) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index edf3b09..a4109a5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1349,23 +1349,58 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, struct hlist_node *node; hlist_for_each_entry_rcu(li, node, hhead, hlist) { - int err; + struct fib_alias *fa; int plen = li->plen; __be32 mask = inet_make_mask(plen); if (l->key != (key & ntohl(mask))) continue; - err = fib_semantic_match(tb, &li->falh, flp, res, plen, fib_flags); + list_for_each_entry_rcu(fa, &li->falh, fa_list) { + struct fib_info *fi = fa->fa_info; + int nhsel, err; + if (fa->fa_tos && fa->fa_tos != flp->fl4_tos) + continue; + if (fa->fa_scope < flp->fl4_scope) + continue; + fib_alias_accessed(fa); + err = fib_props[fa->fa_type].error; + if (err) { #ifdef CONFIG_IP_FIB_TRIE_STATS - if (err <= 0) - t->stats.semantic_match_passed++; - else - t->stats.semantic_match_miss++; + t->stats.semantic_match_miss++; +#endif + return 1; + } + if (fi->fib_flags & RTNH_F_DEAD) + continue; + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; + + if (nh->nh_flags & RTNH_F_DEAD) + continue; + if (flp->oif && flp->oif != nh->nh_oif) + continue; + +#ifdef CONFIG_IP_FIB_TRIE_STATS + t->stats.semantic_match_passed++; +#endif + res->prefixlen = plen; + res->nh_sel = nhsel; + res->type = fa->fa_type; + res->scope = fa->fa_scope; + res->fi = fi; + res->table = tb; + res->fa_head = &li->falh; + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&res->fi->fib_clntref); + return 0; + } + } + +#ifdef CONFIG_IP_FIB_TRIE_STATS + t->stats.semantic_match_miss++; #endif - if (err <= 0) - return err; } return 1; -- cgit v1.1 From e3f48d37cf87a4a94e9f05fddc39b0e5f2307c27 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 28 Feb 2011 20:26:31 +0000 Subject: net: allow handlers to be processed for orig_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was there before, I forgot about this. Allows deliveries to ptype_base handlers registered for orig_dev. I presume this is still desired. Signed-off-by: Jiri Pirko Reviewed-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 30440e7..9f66de9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3208,7 +3208,8 @@ ncls: list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && - (ptype->dev == null_or_dev || ptype->dev == skb->dev)) { + (ptype->dev == null_or_dev || ptype->dev == skb->dev || + ptype->dev == orig_dev)) { if (pt_prev) ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; -- cgit v1.1 From 4b66fef9b591b95f447aea12242a1133deb0bd22 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 4 Mar 2011 11:45:03 +0000 Subject: mcast: net_device dev not used ip6_mc_source(), ip6_mc_msfilter() as well as ip6_mc_msfget() declare and assign dev but do not use the variable afterwards. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 7b27d08..f2c9b69 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -319,7 +319,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, { struct in6_addr *source, *group; struct ipv6_mc_socklist *pmc; - struct net_device *dev; struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; @@ -341,7 +340,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, rcu_read_unlock(); return -ENODEV; } - dev = idev->dev; err = -EADDRNOTAVAIL; @@ -455,7 +453,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) { struct in6_addr *group; struct ipv6_mc_socklist *pmc; - struct net_device *dev; struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *newpsl, *psl; @@ -478,7 +475,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) rcu_read_unlock(); return -ENODEV; } - dev = idev->dev; err = 0; @@ -549,7 +545,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct in6_addr *group; struct ipv6_mc_socklist *pmc; struct inet6_dev *idev; - struct net_device *dev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; struct net *net = sock_net(sk); @@ -566,7 +561,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, rcu_read_unlock(); return -ENODEV; } - dev = idev->dev; err = -EADDRNOTAVAIL; /* -- cgit v1.1 From e143038f4dda10a51985b9a3f9fb07d73a5eadfa Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 4 Mar 2011 11:45:04 +0000 Subject: af_packet: struct socket declared/assigned but unused Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/packet/af_packet.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5efef5b..b5362e9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -966,7 +966,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { - struct socket *sock; struct sk_buff *skb; struct net_device *dev; __be16 proto; @@ -978,8 +977,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int len_sum = 0; int status = 0; - sock = po->sk.sk_socket; - mutex_lock(&po->pg_vec_lock); err = -EBUSY; -- cgit v1.1 From efea2c6b2efc1716b2c0cf257cc428d6cd3ed6e2 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 4 Mar 2011 11:45:05 +0000 Subject: sctp: several declared/set but unused fixes Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 -- net/sctp/input.c | 3 --- net/sctp/outqueue.c | 2 -- net/sctp/sm_make_chunk.c | 3 --- net/sctp/socket.c | 2 -- net/sctp/ulpqueue.c | 7 +------ 6 files changed, 1 insertion(+), 18 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5f1fb8b..6b04287 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1089,7 +1089,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) base.inqueue.immediate); struct sctp_endpoint *ep; struct sctp_chunk *chunk; - struct sock *sk; struct sctp_inq *inqueue; int state; sctp_subtype_t subtype; @@ -1097,7 +1096,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) /* The association should be held so we should be safe. */ ep = asoc->ep; - sk = asoc->base.sk; inqueue = &asoc->base.inqueue; sctp_association_hold(asoc); diff --git a/net/sctp/input.c b/net/sctp/input.c index ea21924..826661b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -948,14 +948,11 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb, union sctp_addr addr; union sctp_addr *paddr = &addr; struct sctphdr *sh = sctp_hdr(skb); - sctp_chunkhdr_t *ch; union sctp_params params; sctp_init_chunk_t *init; struct sctp_transport *transport; struct sctp_af *af; - ch = (sctp_chunkhdr_t *) skb->data; - /* * This code will NOT touch anything inside the chunk--it is * strictly READ-ONLY. diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 8c6d379..26dc005 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -545,13 +545,11 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, struct sctp_transport *transport = pkt->transport; sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; - struct sctp_association *asoc; int fast_rtx; int error = 0; int timer = 0; int done = 0; - asoc = q->asoc; lqueue = &q->retransmit; fast_rtx = q->fast_rtx; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b23428f..de98665 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3375,7 +3375,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, struct sctp_fwdtsn_skip *skiplist) { struct sctp_chunk *retval = NULL; - struct sctp_fwdtsn_chunk *ftsn_chunk; struct sctp_fwdtsn_hdr ftsn_hdr; struct sctp_fwdtsn_skip skip; size_t hint; @@ -3388,8 +3387,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, if (!retval) return NULL; - ftsn_chunk = (struct sctp_fwdtsn_chunk *)retval->subh.fwdtsn_hdr; - ftsn_hdr.new_cum_tsn = htonl(new_cum_tsn); retval->subh.fwdtsn_hdr = sctp_addto_chunk(retval, sizeof(ftsn_hdr), &ftsn_hdr); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b53b2eb..3951a10 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2928,7 +2928,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva unsigned int optlen) { struct sctp_sock *sp; - struct sctp_endpoint *ep; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; struct sctp_chunk *chunk; @@ -2936,7 +2935,6 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva int err; sp = sctp_sk(sk); - ep = sp->ep; if (!sctp_addip_enable) return -EPERM; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index c7f7e49..1767818 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -105,11 +105,8 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, gfp_t gfp) { struct sk_buff_head temp; - sctp_data_chunk_t *hdr; struct sctp_ulpevent *event; - hdr = (sctp_data_chunk_t *) chunk->chunk_hdr; - /* Create an event from the incoming chunk. */ event = sctp_ulpevent_make_rcvmsg(chunk->asoc, chunk, gfp); if (!event) @@ -743,11 +740,9 @@ static void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_stream *in; - __u16 sid, csid; - __u16 ssn, cssn; + __u16 sid, csid, cssn; sid = event->stream; - ssn = event->ssn; in = &ulpq->asoc->ssnmap->in; event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; -- cgit v1.1 From 6118e35a7126c1062b1a0f6737b84b4fe4d5c8d4 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 4 Mar 2011 11:45:06 +0000 Subject: af_unix: remove unused struct sockaddr_un cruft Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 217fb7f..df5997d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1567,7 +1567,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct sock *other = NULL; - struct sockaddr_un *sunaddr = msg->msg_name; int err, size; struct sk_buff *skb; int sent = 0; @@ -1590,7 +1589,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { - sunaddr = NULL; err = -ENOTCONN; other = unix_peer(sk); if (!other) -- cgit v1.1 From 4ea09c9caaebc98d06a39c435d4359912cfbb5e2 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Tue, 1 Feb 2011 07:22:11 +0000 Subject: vlan: add support to ndo_fcoe_ddp_target() Add the new target ddp offload support ndo_fcoe_ddp_target(). Signed-off-by: Yi Zou Signed-off-by: Kiran Patil Signed-off-by: Jeff Kirsher --- net/8021q/vlan_dev.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index be73753..ae610f0 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -625,6 +625,19 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) rc = ops->ndo_fcoe_get_wwn(real_dev, wwn, type); return rc; } + +static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid, + struct scatterlist *sgl, unsigned int sgc) +{ + struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + const struct net_device_ops *ops = real_dev->netdev_ops; + int rc = 0; + + if (ops->ndo_fcoe_ddp_target) + rc = ops->ndo_fcoe_ddp_target(real_dev, xid, sgl, sgc); + + return rc; +} #endif static void vlan_dev_change_rx_flags(struct net_device *dev, int change) @@ -858,6 +871,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_fcoe_enable = vlan_dev_fcoe_enable, .ndo_fcoe_disable = vlan_dev_fcoe_disable, .ndo_fcoe_get_wwn = vlan_dev_fcoe_get_wwn, + .ndo_fcoe_ddp_target = vlan_dev_fcoe_ddp_target, #endif }; -- cgit v1.1 From 1fc050a13473348f5c439de2bb41c8e92dba5588 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Mar 2011 20:54:48 -0800 Subject: ipv4: Cache source address in nexthop entries. When doing output route lookups, we have to select the source address if the user has not specified an explicit one. First, if the route has an explicit preferred source address specified, then we use that. Otherwise we search the route's outgoing interface for a suitable address. This search can be precomputed and cached at route insertion time. The only missing part is that we have to refresh this precomputed value any time addresses are added or removed from the interface, and this is accomplished by fib_update_nh_saddrs(). Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 ++ net/ipv4/fib_semantics.c | 31 ++++++++++++++++++++++++------- 2 files changed, 26 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ad0778a..1d2233c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -890,10 +890,12 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif + fib_update_nh_saddrs(dev); rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa); + fib_update_nh_saddrs(dev); if (ifa->ifa_dev->ifa_list == NULL) { /* Last address was deleted from this interface. * Disable IP. diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6349a21..952c737 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -853,6 +853,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto err_inval; } + change_nexthops(fi) { + nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, + nexthop_nh->nh_gw, + nexthop_nh->nh_scope); + } endfor_nexthops(fi) + link_it: ofi = fib_find_info(fi); if (ofi) { @@ -898,13 +904,6 @@ failure: return ERR_PTR(err); } -/* Find appropriate source address to this destination */ - -__be32 __fib_res_prefsrc(struct fib_result *res) -{ - return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); -} - int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) @@ -1128,6 +1127,24 @@ out: return; } +void fib_update_nh_saddrs(struct net_device *dev) +{ + struct hlist_head *head; + struct hlist_node *node; + struct fib_nh *nh; + unsigned int hash; + + hash = fib_devindex_hashfn(dev->ifindex); + head = &fib_info_devhash[hash]; + hlist_for_each_entry(nh, node, head, nh_hash) { + if (nh->nh_dev != dev) + continue; + nh->nh_saddr = inet_select_addr(nh->nh_dev, + nh->nh_gw, + nh->nh_scope); + } +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH /* -- cgit v1.1 From 9846ada138accc63994b57ebdfa76e3e137729e2 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Tue, 8 Mar 2011 15:37:27 +0100 Subject: netfilter: ipset: fix the compile warning in ip_set_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netfilter/ipset/ip_set_core.c:615: warning: ‘clash’ may be used uninitialized in this function Signed-off-by: Shan Wei Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy --- net/netfilter/ipset/ip_set_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 8b1a54c..618a615 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -612,7 +612,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { - struct ip_set *set, *clash; + struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; const char *name, *typename; -- cgit v1.1 From a7ac8fc1d8d26c975c460a69aa7b9d5b5d5d29b0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Mar 2011 11:03:21 -0800 Subject: ipv4: Fix scope value used in route src-address caching. We have to use cfg->fc_scope not the final nh_scope value. Reported-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 952c737..d73d758 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -854,9 +854,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } change_nexthops(fi) { + nexthop_nh->nh_cfg_scope = cfg->fc_scope; nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, nexthop_nh->nh_gw, - nexthop_nh->nh_scope); + nexthop_nh->nh_cfg_scope); } endfor_nexthops(fi) link_it: @@ -1141,7 +1142,7 @@ void fib_update_nh_saddrs(struct net_device *dev) continue; nh->nh_saddr = inet_select_addr(nh->nh_dev, nh->nh_gw, - nh->nh_scope); + nh->nh_cfg_scope); } } -- cgit v1.1 From fdb838cdae4d4f2a478e5cfdd84026f7960b69dd Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Mon, 7 Mar 2011 05:49:47 +0000 Subject: dsa/mv88e6060: support nonzero mii base address The mv88e6060 uses either the lower 16 or upper 16 mii addresses, depending on the value of the EE_CLK/ADDR4 pin. Support both configurations by using the sw_addr setting as base address. Signed-off-by: Peter Korsgaard Acked-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/dsa/mv88e6060.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c index 83277f4..8f4ff5a 100644 --- a/net/dsa/mv88e6060.c +++ b/net/dsa/mv88e6060.c @@ -18,7 +18,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) { - return mdiobus_read(ds->master_mii_bus, addr, reg); + return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg); } #define REG_READ(addr, reg) \ @@ -34,7 +34,8 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { - return mdiobus_write(ds->master_mii_bus, addr, reg, val); + return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr, + reg, val); } #define REG_WRITE(addr, reg, val) \ @@ -50,7 +51,7 @@ static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr) { int ret; - ret = mdiobus_read(bus, REG_PORT(0), 0x03); + ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; if (ret == 0x0600) -- cgit v1.1 From 7b46ac4e77f3224a1befe032c77f1df31d1b42c4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Mar 2011 14:59:28 -0800 Subject: inetpeer: Don't disable BH for initial fast RCU lookup. If modifications on other cpus are ok, then modifications to the tree during lookup done by the local cpu are ok too. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f604ffd..6442c35 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -206,16 +206,16 @@ static int addr_compare(const struct inetpeer_addr *a, }) /* - * Called with rcu_read_lock_bh() + * Called with rcu_read_lock() * Because we hold no lock against a writer, its quite possible we fall * in an endless loop. * But every pointer we follow is guaranteed to be valid thanks to RCU. * We exit from this function if number of links exceeds PEER_MAXDEPTH */ -static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, - struct inet_peer_base *base) +static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, + struct inet_peer_base *base) { - struct inet_peer *u = rcu_dereference_bh(base->root); + struct inet_peer *u = rcu_dereference(base->root); int count = 0; while (u != peer_avl_empty) { @@ -231,9 +231,9 @@ static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, return u; } if (cmp == -1) - u = rcu_dereference_bh(u->avl_left); + u = rcu_dereference(u->avl_left); else - u = rcu_dereference_bh(u->avl_right); + u = rcu_dereference(u->avl_right); if (unlikely(++count == PEER_MAXDEPTH)) break; } @@ -470,11 +470,11 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) /* Look up for the address quickly, lockless. * Because of a concurrent writer, we might not find an existing entry. */ - rcu_read_lock_bh(); + rcu_read_lock(); sequence = read_seqbegin(&base->lock); - p = lookup_rcu_bh(daddr, base); + p = lookup_rcu(daddr, base); invalidated = read_seqretry(&base->lock, sequence); - rcu_read_unlock_bh(); + rcu_read_unlock(); if (p) { /* The existing node has been found. -- cgit v1.1 From adb00ae2ea0ec65f9d3d06079950c0f0ade3b614 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 Mar 2011 14:14:26 +0100 Subject: netfilter: x_tables: misuse of try_then_request_module Since xt_find_match() returns ERR_PTR(xx) on error not NULL, the macro try_then_request_module won't work correctly here. The macro expects its first argument will be zero if condition fails. But ERR_PTR(-ENOENT) is not zero. The correct solution is to propagate the error value back. Found by inspection, and compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/netfilter/x_tables.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 0a77d2f..271eed3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -183,7 +183,7 @@ EXPORT_SYMBOL(xt_unregister_matches); /* * These are weird, but module loading must not be done with mutex * held (since they will register), and we have to have a single - * function to use try_then_request_module(). + * function to use. */ /* Find match, grabs ref. Returns ERR_PTR() on error. */ @@ -221,9 +221,13 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision) { struct xt_match *match; - match = try_then_request_module(xt_find_match(nfproto, name, revision), - "%st_%s", xt_prefix[nfproto], name); - return (match != NULL) ? match : ERR_PTR(-ENOENT); + match = xt_find_match(nfproto, name, revision); + if (IS_ERR(match)) { + request_module("%st_%s", xt_prefix[nfproto], name); + match = xt_find_match(nfproto, name, revision); + } + + return match; } EXPORT_SYMBOL_GPL(xt_request_find_match); @@ -261,9 +265,13 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; - target = try_then_request_module(xt_find_target(af, name, revision), - "%st_%s", xt_prefix[af], name); - return (target != NULL) ? target : ERR_PTR(-ENOENT); + target = xt_find_target(af, name, revision); + if (IS_ERR(target)) { + request_module("%st_%s", xt_prefix[af], name); + target = xt_find_target(af, name, revision); + } + + return target; } EXPORT_SYMBOL_GPL(xt_request_find_target); -- cgit v1.1 From c69d4407d8884e8a127f95d07b1896443f3716ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:06 +0000 Subject: Phonet: fix NULL dereference on TX path with implicit source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous Phonet patch series introduced per-socket implicit destination (i.e. connect()). In that case, the destination socket address is NULL in the transmit function. However commit a8059512b120362b15424f152b2548fe8b11bd0c ("Phonet: implement per-socket destination/peer address") is incomplete and would trigger a NULL dereference. (Fortunately, the code is not in released kernel, and in fact currently not reachable.) Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 30cc676..4706b77 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -262,10 +262,9 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, else if (phonet_address_lookup(net, daddr) == 0) { dev = phonet_device_get(net); skb->pkt_type = PACKET_LOOPBACK; - } else if (pn_sockaddr_get_object(target) == 0) { + } else if (dst == 0) { /* Resource routing (small race until phonet_rcv()) */ - struct sock *sk = pn_find_sock_by_res(net, - target->spn_resource); + struct sock *sk = pn_find_sock_by_res(net, res); if (sk) { sock_put(sk); dev = phonet_device_get(net); -- cgit v1.1 From b765e84f96f728e8e178348fc102f126c1736193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:07 +0000 Subject: Phonet: return an error when packet TX fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phonet assumes that packets are never dropped. We try our best to avoid this situation. But lets return ENOBUFS if queueing to the network device fails so that the caller knows things went wrong. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 4706b77..c6fffd9 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -195,11 +195,7 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, if (skb->pkt_type == PACKET_LOOPBACK) { skb_reset_mac_header(skb); skb_orphan(skb); - if (irq) - netif_rx(skb); - else - netif_rx_ni(skb); - err = 0; + err = (irq ? netif_rx(skb) : netif_rx_ni(skb)) ? -ENOBUFS : 0; } else { err = dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len); @@ -208,6 +204,8 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev, goto drop; } err = dev_queue_xmit(skb); + if (unlikely(err > 0)) + err = net_xmit_errno(err); } return err; -- cgit v1.1 From 0ebbf318635bf354bdb046419dd10e9a00667f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:08 +0000 Subject: Phonet: correct pipe backlog callback return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases, the Phonet pipe backlog callbacks returned negative errno instead of NET_RX_* values. In other cases, NET_RX_DROP was returned for invalid packets, even though it seems only intended for buffering problems (not for deliberately discarded packets). Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 875e86c..40952c7 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -522,7 +522,8 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) if (!pn_flow_safe(pn->rx_fc)) { err = sock_queue_rcv_skb(sk, skb); if (!err) - return 0; + return NET_RX_SUCCESS; + err = -ENOBUFS; break; } @@ -575,7 +576,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) } out: kfree_skb(skb); - return err; + return (err == -ENOBUFS) ? NET_RX_DROP : NET_RX_SUCCESS; queue: skb->dev = NULL; @@ -584,7 +585,7 @@ queue: skb_queue_tail(queue, skb); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, err); - return 0; + return NET_RX_SUCCESS; } /* Destroy connected sock. */ @@ -686,11 +687,6 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) } peer_type = hdr->other_pep_type << 8; - if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) { - pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); - return -ENOBUFS; - } - /* Parse sub-blocks (options) */ n_sb = hdr->data[4]; while (n_sb > 0) { @@ -790,7 +786,6 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) struct sock *sknode; struct pnpipehdr *hdr; struct sockaddr_pn dst; - int err = NET_RX_SUCCESS; u8 pipe_handle; if (!pskb_may_pull(skb, sizeof(*hdr))) @@ -814,18 +809,20 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) sock_put(sknode); if (net_ratelimit()) printk(KERN_WARNING"Phonet unconnected PEP ignored"); - err = NET_RX_DROP; goto drop; } switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: - err = pep_connreq_rcv(sk, skb); + if (sk->sk_state == TCP_LISTEN && !sk_acceptq_is_full(sk)) + pep_connreq_rcv(sk, skb); + else + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); break; #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_CONNECT_RESP: - err = pep_connresp_rcv(sk, skb); + pep_connresp_rcv(sk, skb); break; #endif @@ -842,11 +839,11 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) case PNS_PEP_DISABLE_REQ: /* invalid handle is not even allowed here! */ default: - err = NET_RX_DROP; + break; } drop: kfree_skb(skb); - return err; + return NET_RX_SUCCESS; } #ifndef CONFIG_PHONET_PIPECTRLR -- cgit v1.1 From 44c9ab16d29a50af6ed9ae084b75774570de512a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:09 +0000 Subject: Phonet: factor common code to send control messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the addition of the pipe controller, there is now quite a bit of repetitive code for small signaling messages. Lets factor it. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 225 ++++++++++++++++++------------------------------------- 1 file changed, 73 insertions(+), 152 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 40952c7..610794a 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -77,24 +77,34 @@ static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen, return data; } -static int pep_reply(struct sock *sk, struct sk_buff *oskb, - u8 code, const void *data, int len, gfp_t priority) +static struct sk_buff *pep_alloc_skb(struct sock *sk, const void *payload, + int len, gfp_t priority) +{ + struct sk_buff *skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + if (!skb) + return NULL; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, payload, len); + __skb_push(skb, sizeof(struct pnpipehdr)); + skb_reset_transport_header(skb); + return skb; +} + +static int pep_reply(struct sock *sk, struct sk_buff *oskb, u8 code, + const void *data, int len, gfp_t priority) { const struct pnpipehdr *oph = pnp_hdr(oskb); struct pnpipehdr *ph; struct sk_buff *skb; struct sockaddr_pn peer; - skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + skb = pep_alloc_skb(sk, data, len, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_put(skb, len); - skb_copy_to_linear_data(skb, data, len); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); ph->utid = oph->utid; ph->message_id = oph->message_id + 1; /* REQ -> RESP */ @@ -105,135 +115,69 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb, return pn_skb_send(sk, skb, &peer); } -#define PAD 0x00 - -#ifdef CONFIG_PHONET_PIPECTRLR -static int pipe_handler_send_req(struct sock *sk, u8 msg_id, gfp_t priority) +static int pep_indicate(struct sock *sk, u8 id, u8 code, + const void *data, int len, gfp_t priority) { - int len; + struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; - struct pep_sock *pn = pep_sk(sk); - - static const u8 data[4] = { - PAD, PAD, PAD, PAD, - }; - switch (msg_id) { - case PNS_PEP_CONNECT_REQ: - len = sizeof(data); - break; - - case PNS_PEP_DISCONNECT_REQ: - case PNS_PEP_ENABLE_REQ: - case PNS_PEP_DISABLE_REQ: - len = 0; - break; - - default: - return -EINVAL; - } - - skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + skb = pep_alloc_skb(sk, data, len, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - if (len) { - __skb_put(skb, len); - skb_copy_to_linear_data(skb, data, len); - } - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = msg_id; /* whatever */ - ph->message_id = msg_id; + ph->utid = 0; + ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->error_code = PN_PIPE_NO_ERROR; - + ph->data[0] = code; return pn_skb_send(sk, skb, NULL); } -static int pipe_handler_send_created_ind(struct sock *sk, u8 msg_id) +#define PAD 0x00 + +#ifdef CONFIG_PHONET_PIPECTRLR +static int pipe_handler_request(struct sock *sk, u8 id, u8 code, + const void *data, int len) { - int err_code; + struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; - struct pep_sock *pn = pep_sk(sk); - static u8 data[4] = { - 0x03, 0x04, - }; - data[2] = pn->tx_fc; - data[3] = pn->rx_fc; - - /* - * actually, below is number of sub-blocks and not error code. - * Pipe_created_ind message format does not have any - * error code field. However, the Phonet stack will always send - * an error code as part of pnpipehdr. So, use that err_code to - * specify the number of sub-blocks. - */ - err_code = 0x01; - - skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC); + skb = pep_alloc_skb(sk, data, len, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_put(skb, sizeof(data)); - skb_copy_to_linear_data(skb, data, sizeof(data)); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); - ph->utid = 0; - ph->message_id = msg_id; + ph->utid = id; /* whatever */ + ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->error_code = err_code; - + ph->data[0] = code; return pn_skb_send(sk, skb, NULL); } -static int pipe_handler_send_ind(struct sock *sk, u8 msg_id) +static int pipe_handler_send_created_ind(struct sock *sk) { - int err_code; - struct pnpipehdr *ph; - struct sk_buff *skb; struct pep_sock *pn = pep_sk(sk); + u8 data[4] = { + PN_PIPE_SB_NEGOTIATED_FC, pep_sb_size(2), + pn->tx_fc, pn->rx_fc, + }; - /* - * actually, below is a filler. - * Pipe_enabled/disabled_ind message format does not have any - * error code field. However, the Phonet stack will always send - * an error code as part of pnpipehdr. So, use that err_code to - * specify the filler value. - */ - err_code = 0x0; - - skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); - if (!skb) - return -ENOMEM; - skb_set_owner_w(skb, sk); - - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); - ph = pnp_hdr(skb); - ph->utid = 0; - ph->message_id = msg_id; - ph->pipe_handle = pn->pipe_handle; - ph->error_code = err_code; + return pep_indicate(sk, PNS_PIPE_CREATED_IND, 1 /* sub-blocks */, + data, 4, GFP_ATOMIC); +} - return pn_skb_send(sk, skb, NULL); +static int pipe_handler_send_ind(struct sock *sk, u8 id) +{ + return pep_indicate(sk, id, PAD, NULL, 0, GFP_ATOMIC); } static int pipe_handler_enable_pipe(struct sock *sk, int enable) { u8 id = enable ? PNS_PEP_ENABLE_REQ : PNS_PEP_DISABLE_REQ; - return pipe_handler_send_req(sk, id, GFP_KERNEL); + return pipe_handler_request(sk, id, PAD, NULL, 0); } #endif @@ -274,23 +218,21 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, struct sk_buff *skb; struct pnpipehdr *ph; struct sockaddr_pn dst; + u8 data[4] = { + oph->data[0], /* PEP type */ + code, /* error code, at an unusual offset */ + PAD, PAD, + }; - skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + skb = pep_alloc_skb(sk, data, 4, priority); if (!skb) return -ENOMEM; - skb_set_owner_w(skb, sk); - - skb_reserve(skb, MAX_PHONET_HEADER); - ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4); + ph = pnp_hdr(skb); ph->utid = oph->utid; ph->message_id = PNS_PEP_CTRL_RESP; ph->pipe_handle = oph->pipe_handle; ph->data[0] = oph->data[1]; /* CTRL id */ - ph->data[1] = oph->data[0]; /* PEP type */ - ph->data[2] = code; /* error code, at an usual offset */ - ph->data[3] = PAD; - ph->data[4] = PAD; pn_skb_get_src_sockaddr(oskb, &dst); return pn_skb_send(sk, skb, &dst); @@ -298,34 +240,15 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) { - struct pep_sock *pn = pep_sk(sk); - struct pnpipehdr *ph; - struct sk_buff *skb; + u8 data[4] = { type, PAD, PAD, status }; - skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); - if (!skb) - return -ENOMEM; - skb_set_owner_w(skb, sk); - - skb_reserve(skb, MAX_PNPIPE_HEADER + 4); - __skb_push(skb, sizeof(*ph) + 4); - skb_reset_transport_header(skb); - ph = pnp_hdr(skb); - ph->utid = 0; - ph->message_id = PNS_PEP_STATUS_IND; - ph->pipe_handle = pn->pipe_handle; - ph->pep_type = PN_PEP_TYPE_COMMON; - ph->data[1] = type; - ph->data[2] = PAD; - ph->data[3] = PAD; - ph->data[4] = status; - - return pn_skb_send(sk, skb, NULL); + return pep_indicate(sk, PNS_PEP_STATUS_IND, PN_PEP_TYPE_COMMON, + data, 4, priority); } /* Send our RX flow control information to the sender. * Socket must be locked. */ -static void pipe_grant_credits(struct sock *sk) +static void pipe_grant_credits(struct sock *sk, gfp_t priority) { struct pep_sock *pn = pep_sk(sk); @@ -335,16 +258,16 @@ static void pipe_grant_credits(struct sock *sk) case PN_LEGACY_FLOW_CONTROL: /* TODO */ break; case PN_ONE_CREDIT_FLOW_CONTROL: - pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, - PEP_IND_READY, GFP_ATOMIC); - pn->rx_credits = 1; + if (pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, + PEP_IND_READY, priority) == 0) + pn->rx_credits = 1; break; case PN_MULTI_CREDIT_FLOW_CONTROL: if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) break; if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, CREDITS_MAX - pn->rx_credits, - GFP_ATOMIC) == 0) + priority) == 0) pn->rx_credits = CREDITS_MAX; break; } @@ -474,7 +397,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) break; /* Nothing to do */ sk->sk_state = TCP_ESTABLISHED; - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_ATOMIC); break; #endif @@ -561,7 +484,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) break; /* Nothing to do */ sk->sk_state = TCP_ESTABLISHED; - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_ATOMIC); break; case PNS_PIPE_DISABLED_IND: @@ -655,7 +578,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) pn->rx_credits = 0; sk->sk_state_change(sk); - return pipe_handler_send_created_ind(sk, PNS_PIPE_CREATED_IND); + return pipe_handler_send_created_ind(sk); } #endif @@ -853,19 +776,15 @@ static int pipe_do_remove(struct sock *sk) struct pnpipehdr *ph; struct sk_buff *skb; - skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL); + skb = pep_alloc_skb(sk, NULL, 0, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_reserve(skb, MAX_PNPIPE_HEADER); - __skb_push(skb, sizeof(*ph)); - skb_reset_transport_header(skb); ph = pnp_hdr(skb); ph->utid = 0; ph->message_id = PNS_PIPE_REMOVE_REQ; ph->pipe_handle = pn->pipe_handle; ph->data[0] = PAD; - return pn_skb_send(sk, skb, NULL); } #endif @@ -894,7 +813,7 @@ static void pep_sock_close(struct sock *sk, long timeout) pipe_do_remove(sk); #else /* send pep disconnect request */ - pipe_handler_send_req(sk, PNS_PEP_DISCONNECT_REQ, GFP_KERNEL); + pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD, NULL, 0); sk->sk_state = TCP_CLOSE; #endif } @@ -980,10 +899,12 @@ static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) { struct pep_sock *pn = pep_sk(sk); const struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD }; pn->pn_sk.dobject = pn_sockaddr_get_object(spn); pn->pn_sk.resource = pn_sockaddr_get_resource(spn); - return pipe_handler_send_req(sk, PNS_PEP_CONNECT_REQ, GFP_KERNEL); + return pipe_handler_request(sk, PNS_PEP_CONNECT_REQ, + PN_PIPE_DISABLE, data, 4); } #endif @@ -1280,7 +1201,7 @@ struct sk_buff *pep_read(struct sock *sk) struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); if (sk->sk_state == TCP_ESTABLISHED) - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_ATOMIC); return skb; } @@ -1325,7 +1246,7 @@ static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, } if (sk->sk_state == TCP_ESTABLISHED) - pipe_grant_credits(sk); + pipe_grant_credits(sk, GFP_KERNEL); release_sock(sk); copy: msg->msg_flags |= MSG_EOR; -- cgit v1.1 From f7ae8d59f66154df0424fd94035c89981fed3379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:10 +0000 Subject: Phonet: allocate sock from accept syscall rather than soft IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This moves most of the accept logic to process context like other socket stacks do. Then we can use a few more common socket helpers and simplify a bit. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 284 ++++++++++++++++++++++------------------------------ net/phonet/socket.c | 10 +- 2 files changed, 121 insertions(+), 173 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 610794a..c0fab4c 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -42,7 +42,7 @@ * TCP_ESTABLISHED connected pipe in enabled state * * pep_sock locking: - * - sk_state, ackq, hlist: sock lock needed + * - sk_state, hlist: sock lock needed * - listener: read only * - pipe_handle: read only */ @@ -202,11 +202,12 @@ static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) GFP_KERNEL); } -static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) +static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code, + gfp_t priority) { static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; WARN_ON(code == PN_PIPE_NO_ERROR); - return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); + return pep_reply(sk, skb, code, data, sizeof(data), priority); } /* Control requests are not sent by the pipe service and have a specific @@ -365,7 +366,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: - pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_ATOMIC); break; case PNS_PEP_DISCONNECT_REQ: @@ -574,7 +575,6 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_state = TCP_SYN_RECV; sk->sk_backlog_rcv = pipe_do_rcv; - sk->sk_destruct = pipe_destruct; pn->rx_credits = 0; sk->sk_state_change(sk); @@ -582,96 +582,6 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) } #endif -static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) -{ - struct sock *newsk; - struct pep_sock *newpn, *pn = pep_sk(sk); - struct pnpipehdr *hdr; - struct sockaddr_pn dst, src; - u16 peer_type; - u8 pipe_handle, enabled, n_sb; - u8 aligned = 0; - - if (!pskb_pull(skb, sizeof(*hdr) + 4)) - return -EINVAL; - - hdr = pnp_hdr(skb); - pipe_handle = hdr->pipe_handle; - switch (hdr->state_after_connect) { - case PN_PIPE_DISABLE: - enabled = 0; - break; - case PN_PIPE_ENABLE: - enabled = 1; - break; - default: - pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM); - return -EINVAL; - } - peer_type = hdr->other_pep_type << 8; - - /* Parse sub-blocks (options) */ - n_sb = hdr->data[4]; - while (n_sb > 0) { - u8 type, buf[1], len = sizeof(buf); - const u8 *data = pep_get_sb(skb, &type, &len, buf); - - if (data == NULL) - return -EINVAL; - switch (type) { - case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: - if (len < 1) - return -EINVAL; - peer_type = (peer_type & 0xff00) | data[0]; - break; - case PN_PIPE_SB_ALIGNED_DATA: - aligned = data[0] != 0; - break; - } - n_sb--; - } - - skb = skb_clone(skb, GFP_ATOMIC); - if (!skb) - return -ENOMEM; - - /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); - if (!newsk) { - kfree_skb(skb); - return -ENOMEM; - } - sock_init_data(NULL, newsk); - newsk->sk_state = TCP_SYN_RECV; - newsk->sk_backlog_rcv = pipe_do_rcv; - newsk->sk_protocol = sk->sk_protocol; - newsk->sk_destruct = pipe_destruct; - - newpn = pep_sk(newsk); - pn_skb_get_dst_sockaddr(skb, &dst); - pn_skb_get_src_sockaddr(skb, &src); - newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); - newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); - newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst); - skb_queue_head_init(&newpn->ctrlreq_queue); - newpn->pipe_handle = pipe_handle; - atomic_set(&newpn->tx_credits, 0); - newpn->peer_type = peer_type; - newpn->rx_credits = 0; - newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; - newpn->init_enable = enabled; - newpn->aligned = aligned; - - BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); - skb_queue_head(&newsk->sk_receive_queue, skb); - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk, 0); - - sk_acceptq_added(sk); - sk_add_node(newsk, &pn->ackq); - return 0; -} - /* Listening sock must be locked */ static struct sock *pep_find_pipe(const struct hlist_head *hlist, const struct sockaddr_pn *dst, @@ -726,22 +636,18 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) if (sknode) return sk_receive_skb(sknode, skb, 1); - /* Look for a pipe handle pending accept */ - sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle); - if (sknode) { - sock_put(sknode); - if (net_ratelimit()) - printk(KERN_WARNING"Phonet unconnected PEP ignored"); - goto drop; - } - switch (hdr->message_id) { case PNS_PEP_CONNECT_REQ: - if (sk->sk_state == TCP_LISTEN && !sk_acceptq_is_full(sk)) - pep_connreq_rcv(sk, skb); - else - pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); - break; + if (sk->sk_state != TCP_LISTEN || sk_acceptq_is_full(sk)) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, + GFP_ATOMIC); + break; + } + skb_queue_head(&sk->sk_receive_queue, skb); + sk_acceptq_added(sk); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, 0); + return NET_RX_SUCCESS; #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_CONNECT_RESP: @@ -799,24 +705,16 @@ static void pep_sock_close(struct sock *sk, long timeout) sk_common_release(sk); lock_sock(sk); - if (sk->sk_state == TCP_LISTEN) { - /* Destroy the listen queue */ - struct sock *sknode; - struct hlist_node *p, *n; - - sk_for_each_safe(sknode, p, n, &pn->ackq) - sk_del_node_init(sknode); - sk->sk_state = TCP_CLOSE; - } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) { + if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) { #ifndef CONFIG_PHONET_PIPECTRLR /* Forcefully remove dangling Phonet pipe */ pipe_do_remove(sk); #else /* send pep disconnect request */ pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD, NULL, 0); - sk->sk_state = TCP_CLOSE; #endif } + sk->sk_state = TCP_CLOSE; ifindex = pn->ifindex; pn->ifindex = 0; @@ -827,69 +725,121 @@ static void pep_sock_close(struct sock *sk, long timeout) sock_put(sk); } -static int pep_wait_connreq(struct sock *sk, int noblock) +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) { - struct task_struct *tsk = current; - struct pep_sock *pn = pep_sk(sk); - long timeo = sock_rcvtimeo(sk, noblock); - - for (;;) { - DEFINE_WAIT(wait); + struct pep_sock *pn = pep_sk(sk), *newpn; + struct sock *newsk = NULL; + struct sk_buff *skb; + struct pnpipehdr *hdr; + struct sockaddr_pn dst, src; + int err; + u16 peer_type; + u8 pipe_handle, enabled, n_sb; + u8 aligned = 0; - if (sk->sk_state != TCP_LISTEN) - return -EINVAL; - if (!hlist_empty(&pn->ackq)) - break; - if (!timeo) - return -EWOULDBLOCK; - if (signal_pending(tsk)) - return sock_intr_errno(timeo); + skb = skb_recv_datagram(sk, 0, flags & O_NONBLOCK, errp); + if (!skb) + return NULL; - prepare_to_wait_exclusive(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - finish_wait(sk_sleep(sk), &wait); + lock_sock(sk); + if (sk->sk_state != TCP_LISTEN) { + err = -EINVAL; + goto drop; } + sk_acceptq_removed(sk); - return 0; -} + err = -EPROTO; + if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) + goto drop; -static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) -{ - struct pep_sock *pn = pep_sk(sk); - struct sock *newsk = NULL; - struct sk_buff *oskb; - int err; + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + switch (hdr->state_after_connect) { + case PN_PIPE_DISABLE: + enabled = 0; + break; + case PN_PIPE_ENABLE: + enabled = 1; + break; + default: + pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM, + GFP_KERNEL); + goto drop; + } + peer_type = hdr->other_pep_type << 8; - lock_sock(sk); - err = pep_wait_connreq(sk, flags & O_NONBLOCK); - if (err) - goto out; + /* Parse sub-blocks (options) */ + n_sb = hdr->data[4]; + while (n_sb > 0) { + u8 type, buf[1], len = sizeof(buf); + const u8 *data = pep_get_sb(skb, &type, &len, buf); - newsk = __sk_head(&pn->ackq); + if (data == NULL) + goto drop; + switch (type) { + case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: + if (len < 1) + goto drop; + peer_type = (peer_type & 0xff00) | data[0]; + break; + case PN_PIPE_SB_ALIGNED_DATA: + aligned = data[0] != 0; + break; + } + n_sb--; + } - oskb = skb_dequeue(&newsk->sk_receive_queue); - err = pep_accept_conn(newsk, oskb); - if (err) { - skb_queue_head(&newsk->sk_receive_queue, oskb); + /* Check for duplicate pipe handle */ + newsk = pep_find_pipe(&pn->hlist, &dst, pipe_handle); + if (unlikely(newsk)) { + __sock_put(newsk); newsk = NULL; - goto out; + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE, GFP_KERNEL); + goto drop; + } + + /* Create a new to-be-accepted sock */ + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot); + if (!newsk) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); + err = -ENOBUFS; + goto drop; } - kfree_skb(oskb); + sock_init_data(NULL, newsk); + newsk->sk_state = TCP_SYN_RECV; + newsk->sk_backlog_rcv = pipe_do_rcv; + newsk->sk_protocol = sk->sk_protocol; + newsk->sk_destruct = pipe_destruct; + + newpn = pep_sk(newsk); + pn_skb_get_dst_sockaddr(skb, &dst); + pn_skb_get_src_sockaddr(skb, &src); + newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); + newpn->pn_sk.dobject = pn_sockaddr_get_object(&src); + newpn->pn_sk.resource = pn_sockaddr_get_resource(&dst); sock_hold(sk); - pep_sk(newsk)->listener = sk; + newpn->listener = sk; + skb_queue_head_init(&newpn->ctrlreq_queue); + newpn->pipe_handle = pipe_handle; + atomic_set(&newpn->tx_credits, 0); + newpn->ifindex = 0; + newpn->peer_type = peer_type; + newpn->rx_credits = 0; + newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; + newpn->init_enable = enabled; + newpn->aligned = aligned; - sock_hold(newsk); - sk_del_node_init(newsk); - sk_acceptq_removed(sk); + err = pep_accept_conn(newsk, skb); + if (err) { + sock_put(newsk); + newsk = NULL; + goto drop; + } sk_add_node(newsk, &pn->hlist); - __sock_put(newsk); - -out: +drop: release_sock(sk); + kfree_skb(skb); *errp = err; return newsk; } @@ -937,7 +887,7 @@ static int pep_init(struct sock *sk) { struct pep_sock *pn = pep_sk(sk); - INIT_HLIST_HEAD(&pn->ackq); + sk->sk_destruct = pipe_destruct; INIT_HLIST_HEAD(&pn->hlist); skb_queue_head_init(&pn->ctrlreq_queue); pn->pipe_handle = PN_PIPE_INVALID_HANDLE; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 65a0333..1eccfc3 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -327,6 +327,9 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock, struct sock *newsk; int err; + if (unlikely(sk->sk_state != TCP_LISTEN)) + return -EINVAL; + newsk = sk->sk_prot->accept(sk, flags, &err); if (!newsk) return err; @@ -363,13 +366,8 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, poll_wait(file, sk_sleep(sk), wait); - switch (sk->sk_state) { - case TCP_LISTEN: - return hlist_empty(&pn->ackq) ? 0 : POLLIN; - case TCP_CLOSE: + if (sk->sk_state == TCP_CLOSE) return POLLERR; - } - if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (!skb_queue_empty(&pn->ctrlreq_queue)) -- cgit v1.1 From acaf7df610ff3faf1778ce40d601fc3dd4a41b40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:11 +0000 Subject: Phonet: provide pipe socket option to retrieve the pipe identifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-space sometimes needs this information. In particular, the GPRS context or the AT commands pipe setups may use the pipe handle as a reference. This removes the settable pipe handle with CONFIG_PHONET_PIPECTRLR. It did not handle error cases correctly. Furthermore, the kernel *could* implement a smart scheme for allocating handles (if ever needed), but userspace really cannot. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index c0fab4c..abfb795 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -853,6 +853,7 @@ static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) pn->pn_sk.dobject = pn_sockaddr_get_object(spn); pn->pn_sk.resource = pn_sockaddr_get_resource(spn); + pn->pipe_handle = 1; /* anything but INVALID_HANDLE */ return pipe_handler_request(sk, PNS_PEP_CONNECT_REQ, PN_PIPE_DISABLE, data, 4); } @@ -909,14 +910,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_PIPE_HANDLE: - if (val) { - pn->pipe_handle = val; - break; - } -#endif - case PNPIPE_ENCAP: if (val && val != PNPIPE_ENCAP_IP) { err = -EINVAL; @@ -982,6 +975,12 @@ static int pep_getsockopt(struct sock *sk, int level, int optname, val = pn->ifindex; break; + case PNPIPE_HANDLE: + val = pn->pipe_handle; + if (val == PN_PIPE_INVALID_HANDLE) + return -EINVAL; + break; + #ifdef CONFIG_PHONET_PIPECTRLR case PNPIPE_ENABLE: val = sk->sk_state == TCP_ESTABLISHED; -- cgit v1.1 From 297edb6003268c1d60da8c21eb76bf39b6428213 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:12 +0000 Subject: Phonet: support active connection without pipe controller on modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides support for newer ISI modems with no need for the earlier experimental compile-time alternative choice. With this, we can now use the same kernel and userspace with both types of modems. This also avoids confusing two different and incompatible state machines, actively connected vs accepted sockets, and adds connection response error handling (processing "SYN/RST" of sorts). Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 172 ++++++++++++++++++++++++++++++---------------------- net/phonet/socket.c | 102 ++++++++++++------------------- 2 files changed, 141 insertions(+), 133 deletions(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index abfb795..671effb 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -136,7 +136,6 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code, #define PAD 0x00 -#ifdef CONFIG_PHONET_PIPECTRLR static int pipe_handler_request(struct sock *sk, u8 id, u8 code, const void *data, int len) { @@ -168,11 +167,7 @@ static int pipe_handler_send_created_ind(struct sock *sk) data, 4, GFP_ATOMIC); } -static int pipe_handler_send_ind(struct sock *sk, u8 id) -{ - return pep_indicate(sk, id, PAD, NULL, 0, GFP_ATOMIC); -} - +#ifdef CONFIG_PHONET_PIPECTRLR static int pipe_handler_enable_pipe(struct sock *sk, int enable) { u8 id = enable ? PNS_PEP_ENABLE_REQ : PNS_PEP_DISABLE_REQ; @@ -376,32 +371,11 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_state_change(sk); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_DISCONNECT_RESP: - sk->sk_state = TCP_CLOSE; - break; -#endif - case PNS_PEP_ENABLE_REQ: /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_ENABLE_RESP: - pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND); - - if (!pn_flow_safe(pn->tx_fc)) { - atomic_set(&pn->tx_credits, 1); - sk->sk_write_space(sk); - } - if (sk->sk_state == TCP_ESTABLISHED) - break; /* Nothing to do */ - sk->sk_state = TCP_ESTABLISHED; - pipe_grant_credits(sk, GFP_ATOMIC); - break; -#endif - case PNS_PEP_RESET_REQ: switch (hdr->state_after_reset) { case PN_PIPE_DISABLE: @@ -420,15 +394,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_DISABLE_RESP: - atomic_set(&pn->tx_credits, 0); - pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND); - sk->sk_state = TCP_SYN_RECV; - pn->rx_credits = 0; - break; -#endif - case PNS_PEP_CTRL_REQ: if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { atomic_inc(&sk->sk_drops); @@ -521,7 +486,6 @@ static void pipe_destruct(struct sock *sk) skb_queue_purge(&pn->ctrlreq_queue); } -#ifdef CONFIG_PHONET_PIPECTRLR static u8 pipe_negotiate_fc(const u8 *fcs, unsigned n) { unsigned i; @@ -546,6 +510,8 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) return -EINVAL; hdr = pnp_hdr(skb); + if (hdr->error_code != PN_PIPE_NO_ERROR) + return -ECONNREFUSED; /* Parse sub-blocks */ n_sb = hdr->data[4]; @@ -573,14 +539,74 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) n_sb--; } - sk->sk_state = TCP_SYN_RECV; - sk->sk_backlog_rcv = pipe_do_rcv; - pn->rx_credits = 0; - sk->sk_state_change(sk); - return pipe_handler_send_created_ind(sk); } -#endif + +/* Queue an skb to an actively connected sock. + * Socket lock must be held. */ +static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + int err = NET_RX_SUCCESS; + + switch (hdr->message_id) { + case PNS_PIPE_ALIGNED_DATA: + __skb_pull(skb, 1); + /* fall through */ + case PNS_PIPE_DATA: + __skb_pull(skb, 3); /* Pipe data header */ + if (!pn_flow_safe(pn->rx_fc)) { + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return NET_RX_SUCCESS; + err = NET_RX_DROP; + break; + } + + if (pn->rx_credits == 0) { + atomic_inc(&sk->sk_drops); + err = NET_RX_DROP; + break; + } + pn->rx_credits--; + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(&sk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return NET_RX_SUCCESS; + + case PNS_PEP_CONNECT_RESP: + if (sk->sk_state != TCP_SYN_SENT) + break; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + if (pep_connresp_rcv(sk, skb)) { + sk->sk_state = TCP_CLOSE_WAIT; + break; + } + + sk->sk_state = TCP_ESTABLISHED; + if (!pn_flow_safe(pn->tx_fc)) { + atomic_set(&pn->tx_credits, 1); + sk->sk_write_space(sk); + } + pipe_grant_credits(sk, GFP_ATOMIC); + break; + + case PNS_PEP_DISCONNECT_RESP: + /* sock should already be dead, nothing to do */ + break; + + case PNS_PEP_STATUS_IND: + pipe_rcv_status(sk, skb); + break; + } + kfree_skb(skb); + return err; +} /* Listening sock must be locked */ static struct sock *pep_find_pipe(const struct hlist_head *hlist, @@ -649,12 +675,6 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_data_ready(sk, 0); return NET_RX_SUCCESS; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_CONNECT_RESP: - pep_connresp_rcv(sk, skb); - break; -#endif - case PNS_PEP_DISCONNECT_REQ: pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; @@ -667,15 +687,19 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) case PNS_PEP_ENABLE_REQ: case PNS_PEP_DISABLE_REQ: /* invalid handle is not even allowed here! */ - default: break; + + default: + if ((1 << sk->sk_state) + & ~(TCPF_CLOSE|TCPF_LISTEN|TCPF_CLOSE_WAIT)) + /* actively connected socket */ + return pipe_handler_do_rcv(sk, skb); } drop: kfree_skb(skb); return NET_RX_SUCCESS; } -#ifndef CONFIG_PHONET_PIPECTRLR static int pipe_do_remove(struct sock *sk) { struct pep_sock *pn = pep_sk(sk); @@ -693,7 +717,6 @@ static int pipe_do_remove(struct sock *sk) ph->data[0] = PAD; return pn_skb_send(sk, skb, NULL); } -#endif /* associated socket ceases to exist */ static void pep_sock_close(struct sock *sk, long timeout) @@ -706,13 +729,12 @@ static void pep_sock_close(struct sock *sk, long timeout) lock_sock(sk); if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) { -#ifndef CONFIG_PHONET_PIPECTRLR - /* Forcefully remove dangling Phonet pipe */ - pipe_do_remove(sk); -#else - /* send pep disconnect request */ - pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD, NULL, 0); -#endif + if (sk->sk_backlog_rcv == pipe_do_rcv) + /* Forcefully remove dangling Phonet pipe */ + pipe_do_remove(sk); + else + pipe_handler_request(sk, PNS_PEP_DISCONNECT_REQ, PAD, + NULL, 0); } sk->sk_state = TCP_CLOSE; @@ -844,20 +866,22 @@ drop: return newsk; } -#ifdef CONFIG_PHONET_PIPECTRLR static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) { struct pep_sock *pn = pep_sk(sk); - const struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + int err; u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD }; - pn->pn_sk.dobject = pn_sockaddr_get_object(spn); - pn->pn_sk.resource = pn_sockaddr_get_resource(spn); pn->pipe_handle = 1; /* anything but INVALID_HANDLE */ - return pipe_handler_request(sk, PNS_PEP_CONNECT_REQ, - PN_PIPE_DISABLE, data, 4); + err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ, + PN_PIPE_ENABLE, data, 4); + if (err) { + pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + return err; + } + sk->sk_state = TCP_SYN_SENT; + return 0; } -#endif static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { @@ -890,8 +914,16 @@ static int pep_init(struct sock *sk) sk->sk_destruct = pipe_destruct; INIT_HLIST_HEAD(&pn->hlist); + pn->listener = NULL; skb_queue_head_init(&pn->ctrlreq_queue); + atomic_set(&pn->tx_credits, 0); + pn->ifindex = 0; + pn->peer_type = 0; pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + pn->rx_credits = 0; + pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; + pn->init_enable = 1; + pn->aligned = 0; return 0; } @@ -1219,9 +1251,9 @@ static void pep_sock_unhash(struct sock *sk) lock_sock(sk); -#ifndef CONFIG_PHONET_PIPECTRLR - if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { + if (pn->listener != NULL) { skparent = pn->listener; + pn->listener = NULL; release_sock(sk); pn = pep_sk(skparent); @@ -1229,7 +1261,7 @@ static void pep_sock_unhash(struct sock *sk) sk_del_node_init(sk); sk = skparent; } -#endif + /* Unhash a listening sock only when it is closed * and all of its active connected pipes are closed. */ if (hlist_empty(&pn->hlist)) @@ -1243,9 +1275,7 @@ static void pep_sock_unhash(struct sock *sk) static struct proto pep_proto = { .close = pep_sock_close, .accept = pep_sock_accept, -#ifdef CONFIG_PHONET_PIPECTRLR .connect = pep_sock_connect, -#endif .ioctl = pep_ioctl, .init = pep_init, .setsockopt = pep_setsockopt, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 1eccfc3..b1adafa 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -225,15 +225,18 @@ static int pn_socket_autobind(struct socket *sock) return 0; /* socket was already bound */ } -#ifdef CONFIG_PHONET_PIPECTRLR static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, int len, int flags) { struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; - long timeo; + struct task_struct *tsk = current; + long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); int err; + if (pn_socket_autobind(sock)) + return -ENOBUFS; if (len < sizeof(struct sockaddr_pn)) return -EINVAL; if (spn->spn_family != AF_PHONET) @@ -243,82 +246,61 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, switch (sock->state) { case SS_UNCONNECTED: - sk->sk_state = TCP_CLOSE; - break; - case SS_CONNECTING: - switch (sk->sk_state) { - case TCP_SYN_RECV: - sock->state = SS_CONNECTED; - err = -EISCONN; - goto out; - case TCP_CLOSE: - err = -EALREADY; - if (flags & O_NONBLOCK) - goto out; - goto wait_connect; - } - break; - case SS_CONNECTED: - switch (sk->sk_state) { - case TCP_SYN_RECV: + if (sk->sk_state != TCP_CLOSE) { err = -EISCONN; goto out; - case TCP_CLOSE: - sock->state = SS_UNCONNECTED; - break; } break; - case SS_DISCONNECTING: - case SS_FREE: - break; + case SS_CONNECTING: + err = -EALREADY; + goto out; + default: + err = -EISCONN; + goto out; } - sk->sk_state = TCP_CLOSE; - sk_stream_kill_queues(sk); + pn->dobject = pn_sockaddr_get_object(spn); + pn->resource = pn_sockaddr_get_resource(spn); sock->state = SS_CONNECTING; + err = sk->sk_prot->connect(sk, addr, len); - if (err < 0) { + if (err) { sock->state = SS_UNCONNECTED; - sk->sk_state = TCP_CLOSE; + pn->dobject = 0; goto out; } - err = -EINPROGRESS; -wait_connect: - if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK)) - goto out; - - timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - release_sock(sk); + while (sk->sk_state == TCP_SYN_SENT) { + DEFINE_WAIT(wait); - err = -ERESTARTSYS; - timeo = wait_event_interruptible_timeout(*sk_sleep(sk), - sk->sk_state != TCP_CLOSE, - timeo); - - lock_sock(sk); - if (timeo < 0) - goto out; /* -ERESTARTSYS */ - - err = -ETIMEDOUT; - if (timeo == 0 && sk->sk_state != TCP_SYN_RECV) - goto out; + if (!timeo) { + err = -EINPROGRESS; + goto out; + } + if (signal_pending(tsk)) { + err = sock_intr_errno(timeo); + goto out; + } - if (sk->sk_state != TCP_SYN_RECV) { - sock->state = SS_UNCONNECTED; - err = sock_error(sk); - if (!err) - err = -ECONNREFUSED; - goto out; + prepare_to_wait_exclusive(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + finish_wait(sk_sleep(sk), &wait); } - sock->state = SS_CONNECTED; - err = 0; + if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) + err = 0; + else if (sk->sk_state == TCP_CLOSE_WAIT) + err = -ECONNRESET; + else + err = -ECONNREFUSED; + sock->state = err ? SS_UNCONNECTED : SS_CONNECTED; out: release_sock(sk); return err; } -#endif static int pn_socket_accept(struct socket *sock, struct socket *newsock, int flags) @@ -486,11 +468,7 @@ const struct proto_ops phonet_stream_ops = { .owner = THIS_MODULE, .release = pn_socket_release, .bind = pn_socket_bind, -#ifdef CONFIG_PHONET_PIPECTRLR .connect = pn_socket_connect, -#else - .connect = sock_no_connect, -#endif .socketpair = sock_no_socketpair, .accept = pn_socket_accept, .getname = pn_socket_getname, -- cgit v1.1 From a015f6f49968c330b236ca2f6c2170820414f922 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 8 Mar 2011 22:44:13 +0000 Subject: Phonet: kill the ST-Ericsson pipe controller Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is now a run-time choice so that a single kernel can support both old and new generation ISI modems. Support for manually enabling the pipe flow is removed as it did not work properly, does not fit well with the socket API, and I am not aware of any use at the moment. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/Kconfig | 12 ------------ net/phonet/pep.c | 25 ------------------------- 2 files changed, 37 deletions(-) (limited to 'net') diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig index 0d9b8a2..6ec7d55 100644 --- a/net/phonet/Kconfig +++ b/net/phonet/Kconfig @@ -14,15 +14,3 @@ config PHONET To compile this driver as a module, choose M here: the module will be called phonet. If unsure, say N. - -config PHONET_PIPECTRLR - bool "Phonet Pipe Controller (EXPERIMENTAL)" - depends on PHONET && EXPERIMENTAL - default N - help - The Pipe Controller implementation in Phonet stack to support Pipe - data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500 - platform. - - This option is incompatible with older Nokia modems. - Say N here unless you really know what you are doing. diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 671effb..68e635f 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -167,15 +167,6 @@ static int pipe_handler_send_created_ind(struct sock *sk) data, 4, GFP_ATOMIC); } -#ifdef CONFIG_PHONET_PIPECTRLR -static int pipe_handler_enable_pipe(struct sock *sk, int enable) -{ - u8 id = enable ? PNS_PEP_ENABLE_REQ : PNS_PEP_DISABLE_REQ; - - return pipe_handler_request(sk, id, PAD, NULL, 0); -} -#endif - static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) { static const u8 data[20] = { @@ -968,16 +959,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, } goto out_norel; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_ENABLE: - if ((1 << sk->sk_state) & ~(TCPF_SYN_RECV|TCPF_ESTABLISHED)) { - err = -ENOTCONN; - break; - } - err = pipe_handler_enable_pipe(sk, val); - break; -#endif - default: err = -ENOPROTOOPT; } @@ -1013,12 +994,6 @@ static int pep_getsockopt(struct sock *sk, int level, int optname, return -EINVAL; break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_ENABLE: - val = sk->sk_state == TCP_ESTABLISHED; - break; -#endif - default: return -ENOPROTOOPT; } -- cgit v1.1 From 4d9d88d121fdd01dd859717909ea3c90173f143a Mon Sep 17 00:00:00 2001 From: Scott James Remnant Date: Tue, 8 Mar 2011 10:45:30 -0800 Subject: net/wireless: add COUNTRY to to regulatory device uevent Regulatory devices issue change uevents to inform userspace of a need to call the crda tool; however these can often be sent before udevd is running, and were not previously included in the results of udevadm trigger (which requests a new change event using the /uevent attribute of the sysfs object). Add a uevent function to the device type which includes the COUNTRY information from the last request if it has yet to be processed, the case of multiple requests is already handled in the code by checking whether an unprocessed one is queued in the same manner and refusing to queue a new one. The existing udev rule continues to work as before. Signed-off-by: Scott James Remnant Acked-By: Kay Sievers Acked-by: Greg Kroah-Hartman Signed-off-by: John W. Linville --- net/wireless/reg.c | 39 ++++++++++++++++++++++++++++----------- net/wireless/reg.h | 1 + 2 files changed, 29 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c565689..3332d5b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -63,6 +63,10 @@ static struct regulatory_request *last_request; /* To trigger userspace events */ static struct platform_device *reg_pdev; +static struct device_type reg_device_type = { + .uevent = reg_device_uevent, +}; + /* * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no @@ -362,16 +366,11 @@ static inline void reg_regdb_query(const char *alpha2) {} /* * This lets us keep regulatory code which is updated on a regulatory - * basis in userspace. + * basis in userspace. Country information is filled in by + * reg_device_uevent */ static int call_crda(const char *alpha2) { - char country_env[9 + 2] = "COUNTRY="; - char *envp[] = { - country_env, - NULL - }; - if (!is_world_regdom((char *) alpha2)) pr_info("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); @@ -381,10 +380,7 @@ static int call_crda(const char *alpha2) /* query internal regulatory database (if it exists) */ reg_regdb_query(alpha2); - country_env[8] = alpha2[0]; - country_env[9] = alpha2[1]; - - return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, envp); + return kobject_uevent(®_pdev->dev.kobj, KOBJ_CHANGE); } /* Used by nl80211 before kmalloc'ing our regulatory domain */ @@ -2087,6 +2083,25 @@ int set_regdom(const struct ieee80211_regdomain *rd) return r; } +#ifdef CONFIG_HOTPLUG +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + if (last_request && !last_request->processed) { + if (add_uevent_var(env, "COUNTRY=%c%c", + last_request->alpha2[0], + last_request->alpha2[1])) + return -ENOMEM; + } + + return 0; +} +#else +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return -ENODEV; +} +#endif /* CONFIG_HOTPLUG */ + /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { @@ -2118,6 +2133,8 @@ int __init regulatory_init(void) if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); + reg_pdev->dev.type = ®_device_type; + spin_lock_init(®_requests_lock); spin_lock_init(®_pending_beacons_lock); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index c4695d0..b67d1c3 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -8,6 +8,7 @@ bool reg_is_valid_request(const char *alpha2); int regulatory_hint_user(const char *alpha2); +int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env); void reg_device_remove(struct wiphy *wiphy); int __init regulatory_init(void); -- cgit v1.1 From 8d5eab5aa676378b4c9daa62d10d08a0bca04677 Mon Sep 17 00:00:00 2001 From: Daniel Halperin Date: Wed, 9 Mar 2011 03:10:18 -0800 Subject: mac80211: update minstrel_ht sample rate when probe is set Waiting until the status is received can cause the same rate to be probed multiple times consecutively. Cc: Felix Fietkau Signed-off-by: Daniel Halperin Signed-off-by: John W. Linville --- net/mac80211/rc80211_minstrel_ht.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 775cf15..bce14fb 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -415,10 +415,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->sample_count--; } - if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) { + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; - minstrel_next_sample_idx(mi); - } for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || @@ -551,13 +549,14 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) sample_idx = sample_table[mg->column][mg->index]; mr = &mg->rates[sample_idx]; sample_idx += mi->sample_group * MCS_GROUP_RATES; + minstrel_next_sample_idx(mi); /* * When not using MRR, do not sample if the probability is already * higher than 95% to avoid wasting airtime */ if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) - goto next; + return -1; /* * Make sure that lower rates get sampled only occasionally, @@ -566,17 +565,13 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (minstrel_get_duration(sample_idx) > minstrel_get_duration(mi->max_tp_rate)) { if (mr->sample_skipped < 20) - goto next; + return -1; if (mi->sample_slow++ > 2) - goto next; + return -1; } return sample_idx; - -next: - minstrel_next_sample_idx(mi); - return -1; } static void -- cgit v1.1 From ee3f1aaf930b7cfbf3d34eff1e5e076393227e90 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Mar 2011 14:06:20 -0800 Subject: ipv4: Lookup multicast routes by rtable using helper. Create a common helper for this operation, since we do it identically in three spots. Suggested by Eric Dumazet. Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 70 +++++++++++++++++++++++---------------------------------- 1 file changed, 28 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d5f634..74909ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1793,6 +1793,24 @@ dont_forward: return 0; } +static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) +{ + struct flowi fl = { + .fl4_dst = rt->rt_key_dst, + .fl4_src = rt->rt_key_src, + .fl4_tos = rt->rt_tos, + .oif = rt->rt_oif, + .iif = rt->rt_iif, + .mark = rt->rt_mark, + }; + struct mr_table *mrt; + int err; + + err = ipmr_fib_lookup(net, &fl, &mrt); + if (err) + return ERR_PTR(err); + return mrt; +} /* * Multicast packets for forwarding arrive here @@ -1805,7 +1823,6 @@ int ip_mr_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; struct mr_table *mrt; - int err; /* Packet is looped back after forward, it should not be * forwarded second time, but still can be delivered locally. @@ -1813,21 +1830,10 @@ int ip_mr_input(struct sk_buff *skb) if (IPCB(skb)->flags & IPSKB_FORWARDED) goto dont_forward; - { - struct rtable *rt = skb_rtable(skb); - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, - }; - err = ipmr_fib_lookup(net, &fl, &mrt); - if (err < 0) { - kfree_skb(skb); - return err; - } + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) { + kfree_skb(skb); + return PTR_ERR(mrt); } if (!local) { if (IPCB(skb)->opt.router_alert) { @@ -1956,19 +1962,9 @@ int pim_rcv_v1(struct sk_buff *skb) pim = igmp_hdr(skb); - { - struct rtable *rt = skb_rtable(skb); - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, - }; - if (ipmr_fib_lookup(net, &fl, &mrt) < 0) - goto drop; - } + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) + goto drop; if (!mrt->mroute_do_pim || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; @@ -1998,19 +1994,9 @@ static int pim_rcv(struct sk_buff *skb) csum_fold(skb_checksum(skb, 0, skb->len, 0)))) goto drop; - { - struct rtable *rt = skb_rtable(skb); - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, - }; - if (ipmr_fib_lookup(net, &fl, &mrt) < 0) - goto drop; - } + mrt = ipmr_rt_fib_lookup(net, skb_rtable(skb)); + if (IS_ERR(mrt)) + goto drop; if (__pim_rcv(mrt, skb, sizeof(*pim))) { drop: kfree_skb(skb); -- cgit v1.1 From 2f4e1b3970973bbb57cc3a3b9d67e67c1c648c37 Mon Sep 17 00:00:00 2001 From: Mario Schuknecht Date: Wed, 9 Mar 2011 14:08:09 -0800 Subject: tcp: ioctl type SIOCOUTQNSD returns amount of data not sent In contrast to SIOCOUTQ which returns the amount of data sent but not yet acknowledged plus data not yet sent this patch only returns the data not sent. For various methods of live streaming bitrate control it may be helpful to know how much data are in the tcp outqueue are not sent yet. Signed-off-by: Mario Schuknecht Signed-off-by: Steffen Sledz Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a17a5a7..b22d450 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -505,6 +505,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) else answ = tp->write_seq - tp->snd_una; break; + case SIOCOUTQNSD: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) + answ = 0; + else + answ = tp->write_seq - tp->snd_nxt; + break; default: return -ENOIOCTLCMD; } -- cgit v1.1 From 67e28ffd864eebbaf48b404d0a8cb4edd2bdc924 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Mar 2011 20:42:07 -0800 Subject: ipv4: Optimize flow initialization in input route lookup. Like in commit 44713b67db10c774f14280c129b0d5fd13c70cf2 ("ipv4: Optimize flow initialization in output route lookup." we can optimize the on-stack flow setup to only initialize the members which are actually used. Otherwise we bzero the entire structure, then initialize explicitly the first half of it. Signed-off-by: David S. Miller --- net/ipv4/route.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 92a24ea..ac32d8f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2081,12 +2081,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct flowi fl = { .fl4_dst = daddr, - .fl4_src = saddr, - .fl4_tos = tos, - .fl4_scope = RT_SCOPE_UNIVERSE, - .mark = skb->mark, - .iif = dev->ifindex }; + struct flowi fl; unsigned flags = 0; u32 itag = 0; struct rtable * rth; @@ -2123,6 +2118,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ + fl.oif = 0; + fl.iif = dev->ifindex; + fl.mark = skb->mark; + fl.fl4_dst = daddr; + fl.fl4_src = saddr; + fl.fl4_tos = tos; + fl.fl4_scope = RT_SCOPE_UNIVERSE; err = fib_lookup(net, &fl, &res); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) -- cgit v1.1 From cc7e17ea0427a5df319e43606a3d6c53b13a6e9c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 9 Mar 2011 20:57:50 -0800 Subject: ipv4: Optimize flow initialization in fib_validate_source(). Like in commit 44713b67db10c774f14280c129b0d5fd13c70cf2 ("ipv4: Optimize flow initialization in output route lookup." we can optimize the on-stack flow setup to only initialize the members which are actually used. Otherwise we bzero the entire structure, then initialize explicitly the first half of it. Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1d2233c..fe10bcd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -193,19 +193,21 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, u32 *itag, u32 mark) { struct in_device *in_dev; - struct flowi fl = { - .fl4_dst = src, - .fl4_src = dst, - .fl4_tos = tos, - .mark = mark, - .iif = oif - }; + struct flowi fl; struct fib_result res; int no_addr, rpf, accept_local; bool dev_match; int ret; struct net *net; + fl.oif = 0; + fl.iif = oif; + fl.mark = mark; + fl.fl4_dst = src; + fl.fl4_src = dst; + fl.fl4_tos = tos; + fl.fl4_scope = RT_SCOPE_UNIVERSE; + no_addr = rpf = accept_local = 0; in_dev = __in_dev_get_rcu(dev); if (in_dev) { -- cgit v1.1 From a252bebe22155313ccdadc20b79f67a239dc9ecb Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Mar 2011 00:40:17 -0800 Subject: tcp: mark tcp_congestion_ops read_mostly Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_bic.c | 2 +- net/ipv4/tcp_cubic.c | 2 +- net/ipv4/tcp_highspeed.c | 2 +- net/ipv4/tcp_htcp.c | 2 +- net/ipv4/tcp_hybla.c | 2 +- net/ipv4/tcp_illinois.c | 2 +- net/ipv4/tcp_lp.c | 2 +- net/ipv4/tcp_scalable.c | 2 +- net/ipv4/tcp_vegas.c | 2 +- net/ipv4/tcp_veno.c | 2 +- net/ipv4/tcp_westwood.c | 2 +- net/ipv4/tcp_yeah.c | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 3b53fd1..6187eb4 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -209,7 +209,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt) } -static struct tcp_congestion_ops bictcp = { +static struct tcp_congestion_ops bictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 71d5f2f..62f775c 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -405,7 +405,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) hystart_update(sk, delay); } -static struct tcp_congestion_ops cubictcp = { +static struct tcp_congestion_ops cubictcp __read_mostly = { .init = bictcp_init, .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 8b6caaf..30f27f6 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -158,7 +158,7 @@ static u32 hstcp_ssthresh(struct sock *sk) } -static struct tcp_congestion_ops tcp_highspeed = { +static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, .cong_avoid = hstcp_cong_avoid, diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 7c94a49..c1a8175 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -284,7 +284,7 @@ static void htcp_state(struct sock *sk, u8 new_state) } } -static struct tcp_congestion_ops htcp = { +static struct tcp_congestion_ops htcp __read_mostly = { .init = htcp_init, .ssthresh = htcp_recalc_ssthresh, .cong_avoid = htcp_cong_avoid, diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 377bc93..fe3ecf4 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -162,7 +162,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp); } -static struct tcp_congestion_ops tcp_hybla = { +static struct tcp_congestion_ops tcp_hybla __read_mostly = { .init = hybla_init, .ssthresh = tcp_reno_ssthresh, .min_cwnd = tcp_reno_min_cwnd, diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 00ca688..813b43a 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -322,7 +322,7 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, } } -static struct tcp_congestion_ops tcp_illinois = { +static struct tcp_congestion_ops tcp_illinois __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index de87037..656d431 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -313,7 +313,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) lp->last_drop = tcp_time_stamp; } -static struct tcp_congestion_ops tcp_lp = { +static struct tcp_congestion_ops tcp_lp __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index a765137..8ce55b8 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk) } -static struct tcp_congestion_ops tcp_scalable = { +static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, .cong_avoid = tcp_scalable_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index c6743ee..80fa2bf 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -304,7 +304,7 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(tcp_vegas_get_info); -static struct tcp_congestion_ops tcp_vegas = { +static struct tcp_congestion_ops tcp_vegas __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 38bc0b5..ac43cd7 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -201,7 +201,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk) return max(tp->snd_cwnd >> 1U, 2U); } -static struct tcp_congestion_ops tcp_veno = { +static struct tcp_congestion_ops tcp_veno __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index a534dda..1b91bf4 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -272,7 +272,7 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, } -static struct tcp_congestion_ops tcp_westwood = { +static struct tcp_congestion_ops tcp_westwood __read_mostly = { .init = tcp_westwood_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index a0f2403..dc7f431 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -225,7 +225,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { return tp->snd_cwnd - reduction; } -static struct tcp_congestion_ops tcp_yeah = { +static struct tcp_congestion_ops tcp_yeah __read_mostly = { .flags = TCP_CONG_RTT_STAMP, .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, -- cgit v1.1 From dbdd9a52e38a4a93adfa4d0278801cce4fad98eb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Mar 2011 16:34:38 -0800 Subject: ipv4: Remove redundant RCU locking in ip_check_mc(). All callers are under rcu_read_lock() protection already. Rename to ip_check_mc_rcu() to make it even more clear. Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 5 ++--- net/ipv4/route.c | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 44ba906..12b65cc 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2333,13 +2333,13 @@ void ip_mc_drop_socket(struct sock *sk) rtnl_unlock(); } -int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) +/* called with rcu_read_lock() */ +int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; struct ip_sf_list *psf; int rv = 0; - rcu_read_lock(); for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == mc_addr) break; @@ -2361,7 +2361,6 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p } else rv = 1; /* unspecified source; tentatively allow */ } - rcu_read_unlock(); return rv; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac32d8f..f6730d9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2309,8 +2309,8 @@ skip_cache: struct in_device *in_dev = __in_dev_get_rcu(dev); if (in_dev) { - int our = ip_check_mc(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + int our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); if (our #ifdef CONFIG_IP_MROUTE || @@ -2368,8 +2368,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fi = NULL; } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; - if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, - oldflp->proto)) + if (!ip_check_mc_rcu(in_dev, oldflp->fl4_dst, oldflp->fl4_src, + oldflp->proto)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use * default one, but do not gateway in this case. -- cgit v1.1 From ff3fccb3d05756ec9fb44350b059d31a5e8e0fdc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Mar 2011 16:23:24 -0800 Subject: ipv4: Remove unnecessary test from ip_mkroute_input() fl->oif will always be zero on the input path, so there is no reason to test for that. Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6730d9..a7ac90c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2047,7 +2047,7 @@ static int ip_mkroute_input(struct sk_buff *skb, unsigned hash; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) + if (res->fi && res->fi->fib_nhs > 1) fib_select_multipath(fl, res); #endif -- cgit v1.1 From 1b7fe59322bef9e7a2c05b64a07a66b875299736 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 10 Mar 2011 17:01:16 -0800 Subject: ipv4: Kill flowi arg to fib_select_multipath() Completely unused. Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- net/ipv4/route.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d73d758..b5d523b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1210,7 +1210,7 @@ int fib_sync_up(struct net_device *dev) * The algorithm is suboptimal, but it provides really * fair weighted route distribution. */ -void fib_select_multipath(const struct flowi *flp, struct fib_result *res) +void fib_select_multipath(struct fib_result *res) { struct fib_info *fi = res->fi; int w; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a7ac90c..9c17e32 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2048,7 +2048,7 @@ static int ip_mkroute_input(struct sk_buff *skb, #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) - fib_select_multipath(fl, res); + fib_select_multipath(res); #endif /* create a routing cache entry */ @@ -2598,7 +2598,7 @@ static struct rtable *ip_route_output_slow(struct net *net, #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && fl.oif == 0) - fib_select_multipath(&fl, &res); + fib_select_multipath(&res); else #endif if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) -- cgit v1.1 From 808118cb41dfe12a1ac0e35515ac4d91b170bdf9 Mon Sep 17 00:00:00 2001 From: Jason Young Date: Thu, 10 Mar 2011 16:43:19 -0800 Subject: mac80211: do not enable ps if 802.1x controlled port is unblocked If dynamic_ps is disabled, enabling power save before the 4-way handshake completes may delay the station from being authorized to send/receive traffic, i.e. increase roaming times. It also may result in a failed 4-way handshake depending on the AP's timing requirements and beacon interval, and the station's listen interval. To fix this, prevent power save from being enabled while the station isn't authorized and recalculate power save whenever the station's authorized state changes. Signed-off-by: Jason Young Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 4 ++++ net/mac80211/mlme.c | 37 ++++++++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7b701dc..11866b4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -834,6 +834,10 @@ static int ieee80211_change_station(struct wiphy *wiphy, rcu_read_unlock(); + if (sdata->vif.type == NL80211_IFTYPE_STATION && + params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) + ieee80211_recalc_ps(local, -1); + return 0; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cc984bd..64d92d5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -613,6 +613,37 @@ static void ieee80211_change_ps(struct ieee80211_local *local) } } +static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_managed *mgd = &sdata->u.mgd; + struct sta_info *sta = NULL; + u32 sta_flags = 0; + + if (!mgd->powersave) + return false; + + if (!mgd->associated) + return false; + + if (!mgd->associated->beacon_ies) + return false; + + if (mgd->flags & (IEEE80211_STA_BEACON_POLL | + IEEE80211_STA_CONNECTION_POLL)) + return false; + + rcu_read_lock(); + sta = sta_info_get(sdata, mgd->bssid); + if (sta) + sta_flags = get_sta_flags(sta); + rcu_read_unlock(); + + if (!(sta_flags & WLAN_STA_AUTHORIZED)) + return false; + + return true; +} + /* need to hold RTNL or interface lock */ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) { @@ -647,11 +678,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency) count++; } - if (count == 1 && found->u.mgd.powersave && - found->u.mgd.associated && - found->u.mgd.associated->beacon_ies && - !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL | - IEEE80211_STA_CONNECTION_POLL))) { + if (count == 1 && ieee80211_powersave_allowed(found)) { struct ieee80211_conf *conf = &local->hw.conf; s32 beaconint_us; -- cgit v1.1 From 3677713b799155c96637cdef3fa025e42f3fcf48 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 7 Mar 2011 16:17:59 -0500 Subject: wireless: add support for ethtool_ops->{get,set}_ringparam Signed-off-by: John W. Linville --- net/wireless/ethtool.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'net') diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index ca4c825..9bde4d1 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -1,5 +1,6 @@ #include #include +#include "core.h" #include "ethtool.h" static void cfg80211_get_drvinfo(struct net_device *dev, @@ -37,9 +38,41 @@ static void cfg80211_get_regs(struct net_device *dev, struct ethtool_regs *regs, regs->len = 0; } +static void cfg80211_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *rp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + memset(rp, 0, sizeof(*rp)); + + if (rdev->ops->get_ringparam) + rdev->ops->get_ringparam(wdev->wiphy, + &rp->tx_pending, &rp->tx_max_pending, + &rp->rx_pending, &rp->rx_max_pending); +} + +static int cfg80211_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *rp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) + return -EINVAL; + + if (rdev->ops->set_ringparam) + return rdev->ops->set_ringparam(wdev->wiphy, + rp->tx_pending, rp->rx_pending); + + return -ENOTSUPP; +} + const struct ethtool_ops cfg80211_ethtool_ops = { .get_drvinfo = cfg80211_get_drvinfo, .get_regs_len = cfg80211_get_regs_len, .get_regs = cfg80211_get_regs, .get_link = ethtool_op_get_link, + .get_ringparam = cfg80211_get_ringparam, + .set_ringparam = cfg80211_set_ringparam, }; -- cgit v1.1 From 38c091590f6ed78fcaf114c14ce133e5b3f717e6 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 7 Mar 2011 16:19:18 -0500 Subject: mac80211: implement support for cfg80211_ops->{get,set}_ringparam Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 17 +++++++++++++++ net/mac80211/driver-ops.h | 26 +++++++++++++++++++++++ net/mac80211/driver-trace.h | 52 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 11866b4..3342135 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2012,6 +2012,21 @@ static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) return drv_get_antenna(local, tx_ant, rx_ant); } +static int ieee80211_set_ringparam(struct wiphy *wiphy, u32 tx, u32 rx) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + return drv_set_ringparam(local, tx, rx); +} + +static void ieee80211_get_ringparam(struct wiphy *wiphy, + u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + drv_get_ringparam(local, tx, tx_max, rx, rx_max); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -2069,4 +2084,6 @@ struct cfg80211_ops mac80211_config_ops = { .mgmt_frame_register = ieee80211_mgmt_frame_register, .set_antenna = ieee80211_set_antenna, .get_antenna = ieee80211_get_antenna, + .set_ringparam = ieee80211_set_ringparam, + .get_ringparam = ieee80211_get_ringparam, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 3729296..9c0d62b 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -526,4 +526,30 @@ static inline int drv_offchannel_tx_cancel_wait(struct ieee80211_local *local) return ret; } +static inline int drv_set_ringparam(struct ieee80211_local *local, + u32 tx, u32 rx) +{ + int ret = -ENOTSUPP; + + might_sleep(); + + trace_drv_set_ringparam(local, tx, rx); + if (local->ops->set_ringparam) + ret = local->ops->set_ringparam(&local->hw, tx, rx); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline void drv_get_ringparam(struct ieee80211_local *local, + u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max) +{ + might_sleep(); + + trace_drv_get_ringparam(local, tx, tx_max, rx, rx_max); + if (local->ops->get_ringparam) + local->ops->get_ringparam(&local->hw, tx, tx_max, rx, rx_max); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 520fe24..45aab80 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -912,6 +912,58 @@ TRACE_EVENT(drv_offchannel_tx, ) ); +TRACE_EVENT(drv_set_ringparam, + TP_PROTO(struct ieee80211_local *local, u32 tx, u32 rx), + + TP_ARGS(local, tx, rx), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx) + __field(u32, rx) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx = tx; + __entry->rx = rx; + ), + + TP_printk( + LOCAL_PR_FMT " tx:%d rx %d", + LOCAL_PR_ARG, __entry->tx, __entry->rx + ) +); + +TRACE_EVENT(drv_get_ringparam, + TP_PROTO(struct ieee80211_local *local, u32 *tx, u32 *tx_max, + u32 *rx, u32 *rx_max), + + TP_ARGS(local, tx, tx_max, rx, rx_max), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx) + __field(u32, tx_max) + __field(u32, rx) + __field(u32, rx_max) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx = *tx; + __entry->tx_max = *tx_max; + __entry->rx = *rx; + __entry->rx_max = *rx_max; + ), + + TP_printk( + LOCAL_PR_FMT " tx:%d tx_max %d rx %d rx_max %d", + LOCAL_PR_ARG, + __entry->tx, __entry->tx_max, __entry->rx, __entry->rx_max + ) +); + DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait, TP_PROTO(struct ieee80211_local *local), TP_ARGS(local) -- cgit v1.1 From 78fbfd8a653ca972afe479517a40661bfff6d8c3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 00:00:52 -0500 Subject: ipv4: Create and use route lookup helpers. The idea here is this minimizes the number of places one has to edit in order to make changes to how flows are defined and used. Signed-off-by: David S. Miller --- net/atm/clip.c | 4 +--- net/bridge/br_netfilter.c | 7 ++---- net/ipv4/af_inet.c | 20 ++++------------- net/ipv4/arp.c | 12 +++------- net/ipv4/igmp.c | 34 +++++++++++++--------------- net/ipv4/ip_gre.c | 49 ++++++++++++++--------------------------- net/ipv4/ip_output.c | 33 +++++++++++---------------- net/ipv4/ipip.c | 36 ++++++++++++------------------ net/ipv4/ipmr.c | 24 ++++++++------------ net/ipv6/ip6_tunnel.c | 19 ++++++++-------- net/ipv6/sit.c | 31 ++++++++++++-------------- net/l2tp/l2tp_ip.c | 30 +++++++++---------------- net/netfilter/ipvs/ip_vs_xmit.c | 14 ++---------- net/rxrpc/ar-peer.c | 23 ++++--------------- 14 files changed, 118 insertions(+), 218 deletions(-) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index 810a129..1d4be60 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -502,8 +502,6 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = 1 }; struct rtable *rt; if (vcc->push != clip_push) { @@ -520,7 +518,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) unlink_clip_vcc(clip_vcc); return 0; } - rt = ip_route_output_key(&init_net, &fl); + rt = ip_route_output(&init_net, ip, 0, 1, 0); if (IS_ERR(rt)) return PTR_ERR(rt); neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 45b57b1..f97af559 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -412,10 +412,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; if (dnat_took_place(skb)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { - struct flowi fl = { - .fl4_dst = iph->daddr, - .fl4_tos = RT_TOS(iph->tos), - }; struct in_device *in_dev = __in_dev_get_rcu(dev); /* If err equals -EHOSTUNREACH the error is due to a @@ -428,7 +424,8 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) goto free_skb; - rt = ip_route_output_key(dev_net(dev), &fl); + rt = ip_route_output(dev_net(dev), iph->daddr, 0, + RT_TOS(iph->tos), 0); if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 35a5020..807d83c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1157,22 +1157,10 @@ int inet_sk_rebuild_header(struct sock *sk) daddr = inet->inet_daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; - { - struct flowi fl = { - .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport, - }; - - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); - } + rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, + inet->inet_dport, inet->inet_sport, + sk->sk_protocol, RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); if (!IS_ERR(rt)) { err = 0; sk_setup_caps(sk, &rt->dst); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index fa9988d..090d273 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -433,14 +433,12 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { - struct flowi fl = { .fl4_dst = sip, - .fl4_src = tip }; struct rtable *rt; int flag = 0; /*unsigned long now; */ struct net *net = dev_net(dev); - rt = ip_route_output_key(net, &fl); + rt = ip_route_output(net, sip, tip, 0, 0); if (IS_ERR(rt)) return 1; if (rt->dst.dev != dev) { @@ -1062,9 +1060,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (dev == NULL) { - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = RTO_ONLINK }; - struct rtable *rt = ip_route_output_key(net, &fl); + struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1185,9 +1181,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (dev == NULL) { - struct flowi fl = { .fl4_dst = ip, - .fl4_tos = RTO_ONLINK }; - struct rtable *rt = ip_route_output_key(net, &fl); + struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0); if (IS_ERR(rt)) return PTR_ERR(rt); dev = rt->dst.dev; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 12b65cc..1fd3d9c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -321,15 +321,12 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) } igmp_skb_size(skb) = size; - { - struct flowi fl = { .oif = dev->ifindex, - .fl4_dst = IGMPV3_ALL_MCR, - .proto = IPPROTO_IGMP }; - rt = ip_route_output_key(net, &fl); - if (IS_ERR(rt)) { - kfree_skb(skb); - return NULL; - } + rt = ip_route_output_ports(net, NULL, IGMPV3_ALL_MCR, 0, + 0, 0, + IPPROTO_IGMP, 0, dev->ifindex); + if (IS_ERR(rt)) { + kfree_skb(skb); + return NULL; } if (rt->rt_src == 0) { kfree_skb(skb); @@ -667,14 +664,12 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, else dst = group; - { - struct flowi fl = { .oif = dev->ifindex, - .fl4_dst = dst, - .proto = IPPROTO_IGMP }; - rt = ip_route_output_key(net, &fl); - if (IS_ERR(rt)) - return -1; - } + rt = ip_route_output_ports(net, NULL, dst, 0, + 0, 0, + IPPROTO_IGMP, 0, dev->ifindex); + if (IS_ERR(rt)) + return -1; + if (rt->rt_src == 0) { ip_rt_put(rt); return -1; @@ -1441,7 +1436,6 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* RTNL is locked */ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { - struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; struct net_device *dev = NULL; struct in_device *idev = NULL; @@ -1456,7 +1450,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) } if (!dev) { - struct rtable *rt = ip_route_output_key(net, &fl); + struct rtable *rt = ip_route_output(net, + imr->imr_multiaddr.s_addr, + 0, 0, 0); if (!IS_ERR(rt)) { dev = rt->dst.dev; ip_rt_put(rt); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7146595..da5941f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -769,20 +769,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); } - { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = dst, - .fl4_src = tiph->saddr, - .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_GRE, - .fl_gre_key = tunnel->parms.o_key - }; - rt = ip_route_output_key(dev_net(dev), &fl); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; - } + rt = ip_route_output_gre(dev_net(dev), dst, tiph->saddr, + tunnel->parms.o_key, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; } tdev = rt->dst.dev; @@ -946,15 +938,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) /* Guess output device to choose reasonable mtu and needed_headroom */ if (iph->daddr) { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_GRE, - .fl_gre_key = tunnel->parms.o_key - }; - struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + struct rtable *rt = ip_route_output_gre(dev_net(dev), + iph->daddr, iph->saddr, + tunnel->parms.o_key, + RT_TOS(iph->tos), + tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; @@ -1208,15 +1196,12 @@ static int ipgre_open(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr)) { - struct flowi fl = { - .oif = t->parms.link, - .fl4_dst = t->parms.iph.daddr, - .fl4_src = t->parms.iph.saddr, - .fl4_tos = RT_TOS(t->parms.iph.tos), - .proto = IPPROTO_GRE, - .fl_gre_key = t->parms.o_key - }; - struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + struct rtable *rt = ip_route_output_gre(dev_net(dev), + t->parms.iph.daddr, + t->parms.iph.saddr, + t->parms.o_key, + RT_TOS(t->parms.iph.tos), + t->parms.link); if (IS_ERR(rt)) return -EADDRNOTAVAIL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 171f483..916152d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -339,26 +339,19 @@ int ip_queue_xmit(struct sk_buff *skb) if(opt && opt->srr) daddr = opt->faddr; - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport }; - - /* If this fails, retransmit mechanism of transport layer will - * keep trying until route appears or the connection times - * itself out. - */ - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); - if (IS_ERR(rt)) - goto no_route; - } + /* If this fails, retransmit mechanism of transport layer will + * keep trying until route appears or the connection times + * itself out. + */ + rt = ip_route_output_ports(sock_net(sk), sk, + daddr, inet->inet_saddr, + inet->inet_dport, + inet->inet_sport, + sk->sk_protocol, + RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); + if (IS_ERR(rt)) + goto no_route; sk_setup_caps(sk, &rt->dst); } skb_dst_set_noref(skb, &rt->dst); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 65008f4..bfc17c5 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -460,20 +460,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error_icmp; } - { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = dst, - .fl4_src= tiph->saddr, - .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_IPIP - }; - - rt = ip_route_output_key(dev_net(dev), &fl); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; - } + rt = ip_route_output_ports(dev_net(dev), NULL, + dst, tiph->saddr, + 0, 0, + IPPROTO_IPIP, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; } tdev = rt->dst.dev; @@ -584,14 +578,12 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { - .oif = tunnel->parms.link, - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, + iph->daddr, iph->saddr, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), + tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 74909ba..594a300 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1611,25 +1611,19 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, #endif if (vif->flags & VIFF_TUNNEL) { - struct flowi fl = { - .oif = vif->link, - .fl4_dst = vif->remote, - .fl4_src = vif->local, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - rt = ip_route_output_key(net, &fl); + rt = ip_route_output_ports(net, NULL, + vif->remote, vif->local, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), vif->link); if (IS_ERR(rt)) goto out_free; encap = sizeof(struct iphdr); } else { - struct flowi fl = { - .oif = vif->link, - .fl4_dst = iph->daddr, - .fl4_tos = RT_TOS(iph->tos), - .proto = IPPROTO_IPIP - }; - rt = ip_route_output_key(net, &fl); + rt = ip_route_output_ports(net, NULL, iph->daddr, 0, + 0, 0, + IPPROTO_IPIP, + RT_TOS(iph->tos), vif->link); if (IS_ERR(rt)) goto out_free; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ea8d5e8..f199b84 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -536,7 +536,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int err; struct sk_buff *skb2; struct iphdr *eiph; - struct flowi fl; struct rtable *rt; err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code, @@ -578,11 +577,10 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, eiph = ip_hdr(skb2); /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_dst = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_IPIP; - rt = ip_route_output_key(dev_net(skb->dev), &fl); + rt = ip_route_output_ports(dev_net(skb->dev), NULL, + eiph->saddr, 0, + 0, 0, + IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt)) goto out; @@ -592,10 +590,11 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); rt = NULL; - fl.fl4_dst = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - rt = ip_route_output_key(dev_net(skb->dev), &fl); + rt = ip_route_output_ports(dev_net(skb->dev), NULL, + eiph->daddr, eiph->saddr, + 0, 0, + IPPROTO_IPIP, + RT_TOS(eiph->tos), 0); if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { if (!IS_ERR(rt)) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3534cea..43b3337 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -732,17 +732,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, dst = addr6->s6_addr32[3]; } - { - struct flowi fl = { .fl4_dst = dst, - .fl4_src = tiph->saddr, - .fl4_tos = RT_TOS(tos), - .oif = tunnel->parms.link, - .proto = IPPROTO_IPV6 }; - rt = ip_route_output_key(dev_net(dev), &fl); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; - } + rt = ip_route_output_ports(dev_net(dev), NULL, + dst, tiph->saddr, + 0, 0, + IPPROTO_IPV6, RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); @@ -858,12 +855,12 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .oif = tunnel->parms.link, - .proto = IPPROTO_IPV6 }; - struct rtable *rt = ip_route_output_key(dev_net(dev), &fl); + struct rtable *rt = ip_route_output_ports(dev_net(dev), NULL, + iph->daddr, iph->saddr, + 0, 0, + IPPROTO_IPV6, + RT_TOS(iph->tos), + tunnel->parms.link); if (!IS_ERR(rt)) { tdev = rt->dst.dev; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 2a698ff..fce9bd3 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -475,25 +475,17 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (opt && opt->srr) daddr = opt->faddr; - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .fl4_dst = daddr, - .fl4_src = inet->inet_saddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = inet->inet_dport }; - - /* If this fails, retransmit mechanism of transport layer will - * keep trying until route appears or the connection times - * itself out. - */ - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); - if (IS_ERR(rt)) - goto no_route; - } + /* If this fails, retransmit mechanism of transport layer will + * keep trying until route appears or the connection times + * itself out. + */ + rt = ip_route_output_ports(sock_net(sk), sk, + daddr, inet->inet_saddr, + inet->inet_dport, inet->inet_sport, + sk->sk_protocol, RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if); + if (IS_ERR(rt)) + goto no_route; sk_setup_caps(sk, &rt->dst); } skb_dst_set(skb, dst_clone(&rt->dst)); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 878f6dd..faf381d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -98,12 +98,7 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { - struct flowi fl = { - .fl4_dst = dest->addr.ip, - .fl4_tos = rtos, - }; - - rt = ip_route_output_key(net, &fl); + rt = ip_route_output(net, dest->addr.ip, 0, rtos, 0); if (IS_ERR(rt)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", @@ -117,12 +112,7 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, } spin_unlock(&dest->dst_lock); } else { - struct flowi fl = { - .fl4_dst = daddr, - .fl4_tos = rtos, - }; - - rt = ip_route_output_key(net, &fl); + rt = ip_route_output(net, daddr, 0, rtos, 0); if (IS_ERR(rt)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 3620c56..55b93dc 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -36,28 +36,13 @@ static void rxrpc_destroy_peer(struct work_struct *work); static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) { struct rtable *rt; - struct flowi fl; peer->if_mtu = 1500; - memset(&fl, 0, sizeof(fl)); - - switch (peer->srx.transport.family) { - case AF_INET: - fl.oif = 0; - fl.proto = IPPROTO_UDP, - fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr; - fl.fl4_src = 0; - fl.fl4_tos = 0; - /* assume AFS.CM talking to AFS.FS */ - fl.fl_ip_sport = htons(7001); - fl.fl_ip_dport = htons(7000); - break; - default: - BUG(); - } - - rt = ip_route_output_key(&init_net, &fl); + rt = ip_route_output_ports(&init_net, NULL, + peer->srx.transport.sin.sin_addr.s_addr, 0, + htons(7000), htons(7001), + IPPROTO_UDP, 0, 0); if (IS_ERR(rt)) { _leave(" [route err %ld]", PTR_ERR(rt)); return; -- cgit v1.1 From ca116922afa8cc5ad46b00c0a637b1cde5ca478a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 11 Mar 2011 15:59:31 -0500 Subject: xfrm: Eliminate "fl" and "pol" args to xfrm_bundle_ok(). There is only one caller of xfrm_bundle_ok(), and that always passes these parameters as NULL. Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b1932a6..9e4aacd 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -50,8 +50,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); -static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - const struct flowi *fl, int family); +static int xfrm_bundle_ok(struct xfrm_dst *xdst, int family); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -2223,7 +2222,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst, AF_UNSPEC); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -2295,8 +2294,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) * still valid. */ -static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, - const struct flowi *fl, int family) +static int xfrm_bundle_ok(struct xfrm_dst *first, int family) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -2305,26 +2303,12 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; -#ifdef CONFIG_XFRM_SUB_POLICY - if (fl) { - if (first->origin && !flow_cache_uli_match(first->origin, fl)) - return 0; - if (first->partner && - !xfrm_selector_match(first->partner, fl, family)) - return 0; - } -#endif last = NULL; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) - return 0; - if (fl && pol && - !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl)) - return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; if (xdst->xfrm_genid != dst->xfrm->genid) -- cgit v1.1 From 1d28f42c1bd4bb2363d88df74d0128b4da135b4a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 00:29:39 -0500 Subject: net: Put flowi_* prefix on AF independent members of struct flowi I intend to turn struct flowi into a union of AF specific flowi structs. There will be a common structure that each variant includes first, much like struct sock_common. This is the first step to move in that direction. Signed-off-by: David S. Miller --- net/core/fib_rules.c | 6 +-- net/dccp/ipv4.c | 17 +++---- net/dccp/ipv6.c | 20 ++++----- net/decnet/af_decnet.c | 4 +- net/decnet/dn_fib.c | 4 +- net/decnet/dn_nsp_out.c | 4 +- net/decnet/dn_route.c | 96 +++++++++++++++++++++------------------- net/ipv4/fib_frontend.c | 12 ++--- net/ipv4/fib_semantics.c | 2 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/icmp.c | 12 ++--- net/ipv4/inet_connection_sock.c | 22 ++++----- net/ipv4/ip_output.c | 18 ++++---- net/ipv4/ipmr.c | 12 ++--- net/ipv4/netfilter.c | 6 +-- net/ipv4/raw.c | 10 ++--- net/ipv4/route.c | 72 +++++++++++++++--------------- net/ipv4/syncookies.c | 20 +++++---- net/ipv4/udp.c | 21 ++++----- net/ipv4/xfrm4_policy.c | 10 ++--- net/ipv4/xfrm4_state.c | 4 +- net/ipv6/af_inet6.c | 6 +-- net/ipv6/datagram.c | 20 ++++----- net/ipv6/icmp.c | 24 +++++----- net/ipv6/inet6_connection_sock.c | 12 ++--- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ip6_output.c | 10 ++--- net/ipv6/ip6_tunnel.c | 8 ++-- net/ipv6/ip6mr.c | 22 ++++----- net/ipv6/ipv6_sockglue.c | 4 +- net/ipv6/mip6.c | 6 +-- net/ipv6/netfilter.c | 4 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/raw.c | 20 ++++----- net/ipv6/route.c | 20 ++++----- net/ipv6/syncookies.c | 6 +-- net/ipv6/tcp_ipv6.c | 22 ++++----- net/ipv6/udp.c | 20 ++++----- net/ipv6/xfrm6_policy.c | 10 ++--- net/ipv6/xfrm6_state.c | 4 +- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- net/netfilter/xt_TEE.c | 4 +- net/sctp/ipv6.c | 8 ++-- net/sctp/protocol.c | 4 +- net/xfrm/xfrm_policy.c | 18 ++++---- net/xfrm/xfrm_state.c | 2 +- 47 files changed, 325 insertions(+), 311 deletions(-) (limited to 'net') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a20e5d3..8248ebb 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,13 +181,13 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; - if (rule->oifindex && (rule->oifindex != fl->oif)) + if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) goto out; - if ((rule->mark ^ fl->mark) & rule->mark_mask) + if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; ret = ops->match(rule, fl, flags); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7882377..09a0991 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -465,14 +465,15 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, - .fl4_dst = ip_hdr(skb)->saddr, - .fl4_src = ip_hdr(skb)->daddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .fl_ip_sport = dccp_hdr(skb)->dccph_dport, - .fl_ip_dport = dccp_hdr(skb)->dccph_sport - }; + struct flowi fl = { + .flowi_oif = skb_rtable(skb)->rt_iif, + .fl4_dst = ip_hdr(skb)->saddr, + .fl4_src = ip_hdr(skb)->daddr, + .fl4_tos = RT_CONN_FLAGS(sk), + .flowi_proto = sk->sk_protocol, + .fl_ip_sport = dccp_hdr(skb)->dccph_dport, + .fl_ip_dport = dccp_hdr(skb)->dccph_sport, + }; security_skb_classify_flow(skb, &fl); rt = ip_route_output_flow(net, &fl, sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5efc57f..5209ee7 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -154,10 +154,10 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, for now. */ memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; + fl.flowi_proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->inet_dport; fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); @@ -248,11 +248,11 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; + fl.flowi_proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.fl6_flowlabel = 0; - fl.oif = ireq6->iif; + fl.flowi_oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); @@ -321,8 +321,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); - fl.proto = IPPROTO_DCCP; - fl.oif = inet6_iif(rxskb); + fl.flowi_proto = IPPROTO_DCCP; + fl.flowi_oif = inet6_iif(rxskb); fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, &fl); @@ -530,11 +530,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct flowi fl; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; + fl.flowi_proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, &fl); @@ -953,10 +953,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.proto = IPPROTO_DCCP; + fl.flowi_proto = IPPROTO_DCCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 2af15b1..aafd15a 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -948,11 +948,11 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, err = -EHOSTUNREACH; memset(&fl, 0, sizeof(fl)); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fld_dst = dn_saddr2dn(&scp->peer); fl.fld_src = dn_saddr2dn(&scp->addr); dn_sk_ports_copy(&fl, scp); - fl.proto = DNPROTO_NSP; + fl.flowi_proto = DNPROTO_NSP; if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, flags) < 0) goto out; sk->sk_route_caps = sk->sk_dst_cache->dev->features; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 0ef0a81..4dfffa0 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -223,7 +223,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct memset(&fl, 0, sizeof(fl)); fl.fld_dst = nh->nh_gw; - fl.oif = nh->nh_oif; + fl.flowi_oif = nh->nh_oif; fl.fld_scope = r->rtm_scope + 1; if (fl.fld_scope < RT_SCOPE_LINK) @@ -424,7 +424,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi * for_nexthops(fi) { if (nh->nh_flags & RTNH_F_DEAD) continue; - if (!fl->oif || fl->oif == nh->nh_oif) + if (!fl->flowi_oif || fl->flowi_oif == nh->nh_oif) break; } if (nhsel < fi->fib_nhs) { diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 2ef1152..b3d6674 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -92,11 +92,11 @@ try_again: } memset(&fl, 0, sizeof(fl)); - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; fl.fld_src = dn_saddr2dn(&scp->addr); fl.fld_dst = dn_saddr2dn(&scp->peer); dn_sk_ports_copy(&fl, scp); - fl.proto = DNPROTO_NSP; + fl.flowi_proto = DNPROTO_NSP; if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, 0) == 0) { dst = sk_dst_get(sk); sk->sk_route_caps = dst->dev->features; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 484fdbf..d74d34b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -286,10 +286,10 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { return ((fl1->fld_dst ^ fl2->fld_dst) | (fl1->fld_src ^ fl2->fld_src) | - (fl1->mark ^ fl2->mark) | + (fl1->flowi_mark ^ fl2->flowi_mark) | (fl1->fld_scope ^ fl2->fld_scope) | - (fl1->oif ^ fl2->oif) | - (fl1->iif ^ fl2->iif)) == 0; + (fl1->flowi_oif ^ fl2->flowi_oif) | + (fl1->flowi_iif ^ fl2->flowi_iif)) == 0; } static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) @@ -905,12 +905,14 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) { - struct flowi fl = { .fld_dst = oldflp->fld_dst, - .fld_src = oldflp->fld_src, - .fld_scope = RT_SCOPE_UNIVERSE, - .mark = oldflp->mark, - .iif = init_net.loopback_dev->ifindex, - .oif = oldflp->oif }; + struct flowi fl = { + .fld_dst = oldflp->fld_dst, + .fld_src = oldflp->fld_src, + .fld_scope = RT_SCOPE_UNIVERSE, + .flowi_mark = oldflp->flowi_mark, + .flowi_iif = init_net.loopback_dev->ifindex, + .flowi_oif = oldflp->flowi_oif, + }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; struct neighbour *neigh = NULL; @@ -926,11 +928,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), le16_to_cpu(oldflp->fld_src), - oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); + oldflp->flowi_mark, init_net.loopback_dev->ifindex, oldflp->flowi_oif); /* If we have an output interface, verify its a DECnet device */ - if (oldflp->oif) { - dev_out = dev_get_by_index(&init_net, oldflp->oif); + if (oldflp->flowi_oif) { + dev_out = dev_get_by_index(&init_net, oldflp->flowi_oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -988,7 +990,7 @@ source_ok: if (!fl.fld_dst) goto out; } - fl.oif = init_net.loopback_dev->ifindex; + fl.flowi_oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -998,7 +1000,7 @@ source_ok: "dn_route_output_slow: initial checks complete." " dst=%o4x src=%04x oif=%d try_hard=%d\n", le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), - fl.oif, try_hard); + fl.flowi_oif, try_hard); /* * N.B. If the kernel is compiled without router support then @@ -1023,8 +1025,8 @@ source_ok: if (!try_hard) { neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); if (neigh) { - if ((oldflp->oif && - (neigh->dev->ifindex != oldflp->oif)) || + if ((oldflp->flowi_oif && + (neigh->dev->ifindex != oldflp->flowi_oif)) || (oldflp->fld_src && (!dn_dev_islocal(neigh->dev, oldflp->fld_src)))) { @@ -1078,7 +1080,7 @@ select_source: if (fl.fld_src == 0 && res.type != RTN_LOCAL) goto e_addr; } - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; goto make_route; } free_res = 1; @@ -1093,14 +1095,14 @@ select_source: dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); res.fi = NULL; goto make_route; } - if (res.fi->fib_nhs > 1 && fl.oif == 0) + if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) dn_fib_select_multipath(&fl, &res); /* @@ -1115,7 +1117,7 @@ select_source: dev_put(dev_out); dev_out = DN_FIB_RES_DEV(res); dev_hold(dev_out); - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; gateway = DN_FIB_RES_GW(res); make_route: @@ -1131,9 +1133,9 @@ make_route: rt->fl.fld_src = oldflp->fld_src; rt->fl.fld_dst = oldflp->fld_dst; - rt->fl.oif = oldflp->oif; - rt->fl.iif = 0; - rt->fl.mark = oldflp->mark; + rt->fl.flowi_oif = oldflp->flowi_oif; + rt->fl.flowi_iif = 0; + rt->fl.flowi_mark = oldflp->flowi_mark; rt->rt_saddr = fl.fld_src; rt->rt_daddr = fl.fld_dst; @@ -1201,9 +1203,9 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl rt = rcu_dereference_bh(rt->dst.dn_next)) { if ((flp->fld_dst == rt->fl.fld_dst) && (flp->fld_src == rt->fl.fld_src) && - (flp->mark == rt->fl.mark) && + (flp->flowi_mark == rt->fl.flowi_mark) && dn_is_output_route(rt) && - (rt->fl.oif == flp->oif)) { + (rt->fl.flowi_oif == flp->flowi_oif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); *pprt = &rt->dst; @@ -1221,7 +1223,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f int err; err = __dn_route_output_key(pprt, flp, flags); - if (err == 0 && flp->proto) { + if (err == 0 && flp->flowi_proto) { *pprt = xfrm_lookup(&init_net, *pprt, flp, NULL, 0); if (IS_ERR(*pprt)) { err = PTR_ERR(*pprt); @@ -1236,9 +1238,9 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock int err; err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); - if (err == 0 && fl->proto) { + if (err == 0 && fl->flowi_proto) { if (!(flags & MSG_DONTWAIT)) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; + fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; *pprt = xfrm_lookup(&init_net, *pprt, fl, sk, 0); if (IS_ERR(*pprt)) { err = PTR_ERR(*pprt); @@ -1260,11 +1262,13 @@ static int dn_route_input_slow(struct sk_buff *skb) int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .fld_dst = cb->dst, - .fld_src = cb->src, - .fld_scope = RT_SCOPE_UNIVERSE, - .mark = skb->mark, - .iif = skb->dev->ifindex }; + struct flowi fl = { + .fld_dst = cb->dst, + .fld_src = cb->src, + .fld_scope = RT_SCOPE_UNIVERSE, + .flowi_mark = skb->mark, + .flowi_iif = skb->dev->ifindex, + }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; int err = -EINVAL; int free_res = 0; @@ -1343,7 +1347,7 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_db->parms.forwarding == 0) goto e_inval; - if (res.fi->fib_nhs > 1 && fl.oif == 0) + if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) dn_fib_select_multipath(&fl, &res); /* @@ -1408,9 +1412,9 @@ make_route: rt->fl.fld_src = cb->src; rt->fl.fld_dst = cb->dst; - rt->fl.oif = 0; - rt->fl.iif = in_dev->ifindex; - rt->fl.mark = fl.mark; + rt->fl.flowi_oif = 0; + rt->fl.flowi_iif = in_dev->ifindex; + rt->fl.flowi_mark = fl.flowi_mark; rt->dst.flags = DST_HOST; rt->dst.neighbour = neigh; @@ -1482,9 +1486,9 @@ static int dn_route_input(struct sk_buff *skb) rt = rcu_dereference(rt->dst.dn_next)) { if ((rt->fl.fld_src == cb->src) && (rt->fl.fld_dst == cb->dst) && - (rt->fl.oif == 0) && - (rt->fl.mark == skb->mark) && - (rt->fl.iif == cb->iif)) { + (rt->fl.flowi_oif == 0) && + (rt->fl.flowi_mark == skb->mark) && + (rt->fl.flowi_iif == cb->iif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); @@ -1541,7 +1545,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, rt->dst.error) < 0) goto rtattr_failure; if (dn_is_input_route(rt)) - RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); + RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.flowi_iif); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; @@ -1570,7 +1574,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void return -EINVAL; memset(&fl, 0, sizeof(fl)); - fl.proto = DNPROTO_NSP; + fl.flowi_proto = DNPROTO_NSP; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) @@ -1583,11 +1587,11 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rta[RTA_DST-1]) memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2); if (rta[RTA_IIF-1]) - memcpy(&fl.iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + memcpy(&fl.flowi_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); - if (fl.iif) { + if (fl.flowi_iif) { struct net_device *dev; - if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fl.flowi_iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1611,7 +1615,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void int oif = 0; if (rta[RTA_OIF - 1]) memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.oif = oif; + fl.flowi_oif = oif; err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fe10bcd..7610528 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -200,9 +200,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, int ret; struct net *net; - fl.oif = 0; - fl.iif = oif; - fl.mark = mark; + fl.flowi_oif = 0; + fl.flowi_iif = oif; + fl.flowi_mark = mark; fl.fl4_dst = src; fl.fl4_src = dst; fl.fl4_tos = tos; @@ -215,7 +215,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); if (mark && !IN_DEV_SRC_VMARK(in_dev)) - fl.mark = 0; + fl.flowi_mark = 0; } if (in_dev == NULL) @@ -253,7 +253,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, goto last_resort; if (rpf == 1) goto e_rpf; - fl.oif = dev->ifindex; + fl.flowi_oif = dev->ifindex; ret = 0; if (fib_lookup(net, &fl, &res) == 0) { @@ -797,7 +797,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) struct fib_result res; struct flowi fl = { - .mark = frn->fl_mark, + .flowi_mark = frn->fl_mark, .fl4_dst = frn->fl_addr, .fl4_tos = frn->fl_tos, .fl4_scope = frn->fl_scope, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b5d523b..79179ad 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -563,7 +563,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct flowi fl = { .fl4_dst = nh->nh_gw, .fl4_scope = cfg->fc_scope + 1, - .oif = nh->nh_oif, + .flowi_oif = nh->nh_oif, }; /* It is not necessary, but requires a bit of thinking */ diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a4109a5..d5ff80e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1379,7 +1379,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (nh->nh_flags & RTNH_F_DEAD) continue; - if (flp->oif && flp->oif != nh->nh_oif) + if (flp->flowi_oif && flp->flowi_oif != nh->nh_oif) continue; #ifdef CONFIG_IP_FIB_TRIE_STATS diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1771ce6..3fde7f2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -353,10 +353,12 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .fl4_dst= daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .proto = IPPROTO_ICMP }; + struct flowi fl = { + .fl4_dst = daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi_proto = IPPROTO_ICMP, + }; security_skb_classify_flow(skb, &fl); rt = ip_route_output_key(net, &fl); if (IS_ERR(rt)) @@ -381,7 +383,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, param->replyopts.faddr : iph->saddr), .fl4_src = saddr, .fl4_tos = RT_TOS(tos), - .proto = IPPROTO_ICMP, + .flowi_proto = IPPROTO_ICMP, .fl_icmp_type = type, .fl_icmp_code = code, }; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e4e301a..9708170 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -356,16 +356,18 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet_sk(sk)->inet_sport, - .fl_ip_dport = ireq->rmt_port }; + struct flowi fl = { + .flowi_oif = sk->sk_bound_dev_if, + .flowi_mark = sk->sk_mark, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), + .flowi_proto = sk->sk_protocol, + .flowi_flags = inet_sk_flowi_flags(sk), + .fl_ip_sport = inet_sk(sk)->inet_sport, + .fl_ip_dport = ireq->rmt_port, + }; struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 916152d..e35ca40 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1474,14 +1474,16 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { .oif = arg->bound_dev_if, - .fl4_dst = daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl_ip_sport = tcp_hdr(skb)->dest, - .fl_ip_dport = tcp_hdr(skb)->source, - .proto = sk->sk_protocol, - .flags = ip_reply_arg_flowi_flags(arg) }; + struct flowi fl = { + .flowi_oif = arg->bound_dev_if, + .fl4_dst = daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), + .fl_ip_sport = tcp_hdr(skb)->dest, + .fl_ip_dport = tcp_hdr(skb)->source, + .flowi_proto = sk->sk_protocol, + .flowi_flags = ip_reply_arg_flowi_flags(arg), + }; security_skb_classify_flow(skb, &fl); rt = ip_route_output_key(sock_net(sk), &fl); if (IS_ERR(rt)) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 594a300..3b72b0a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -436,9 +436,9 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) struct net *net = dev_net(dev); struct mr_table *mrt; struct flowi fl = { - .oif = dev->ifindex, - .iif = skb->skb_iif, - .mark = skb->mark, + .flowi_oif = dev->ifindex, + .flowi_iif = skb->skb_iif, + .flowi_mark = skb->mark, }; int err; @@ -1793,9 +1793,9 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) .fl4_dst = rt->rt_key_dst, .fl4_src = rt->rt_key_src, .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, + .flowi_oif = rt->rt_oif, + .flowi_iif = rt->rt_iif, + .flowi_mark = rt->rt_mark, }; struct mr_table *mrt; int err; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 67bf709..6f40ba5 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -35,9 +35,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) if (type == RTN_LOCAL) fl.fl4_src = iph->saddr; fl.fl4_tos = RT_TOS(iph->tos); - fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl.mark = skb->mark; - fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + fl.flowi_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl.flowi_mark = skb->mark; + fl.flowi_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; rt = ip_route_output_key(net, &fl); if (IS_ERR(rt)) return -1; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 467d570..b42b7cd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -418,7 +418,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->proto) { + switch (fl->flowi_proto) { case IPPROTO_ICMP: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -548,14 +548,14 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, + struct flowi fl = { .flowi_oif = ipc.oif, + .flowi_mark = sk->sk_mark, .fl4_dst = daddr, .fl4_src = saddr, .fl4_tos = tos, - .proto = inet->hdrincl ? IPPROTO_RAW : + .flowi_proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - .flags = FLOWI_FLAG_CAN_SLEEP, + .flowi_flags = FLOWI_FLAG_CAN_SLEEP, }; if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9c17e32..c9aa4f9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1701,9 +1701,9 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) .fl4_dst = rt->rt_key_dst, .fl4_src = rt->rt_key_src, .fl4_tos = rt->rt_tos, - .oif = rt->rt_oif, - .iif = rt->rt_iif, - .mark = rt->rt_mark, + .flowi_oif = rt->rt_oif, + .flowi_iif = rt->rt_iif, + .flowi_mark = rt->rt_mark, }; rcu_read_lock(); @@ -1766,7 +1766,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, /* If a peer entry exists for this destination, we must hook * it up in order to get at cached metrics. */ - if (oldflp && (oldflp->flags & FLOWI_FLAG_PRECOW_METRICS)) + if (oldflp && (oldflp->flowi_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); @@ -2057,9 +2057,9 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif, + hash = rt_hash(daddr, saddr, fl->flowi_iif, rt_genid(dev_net(rth->dst.dev))); - rth = rt_intern_hash(hash, rth, skb, fl->iif); + rth = rt_intern_hash(hash, rth, skb, fl->flowi_iif); if (IS_ERR(rth)) return PTR_ERR(rth); return 0; @@ -2118,9 +2118,9 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - fl.oif = 0; - fl.iif = dev->ifindex; - fl.mark = skb->mark; + fl.flowi_oif = 0; + fl.flowi_iif = dev->ifindex; + fl.flowi_mark = skb->mark; fl.fl4_dst = daddr; fl.fl4_src = saddr; fl.fl4_tos = tos; @@ -2205,8 +2205,8 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); - rth = rt_intern_hash(hash, rth, skb, fl.iif); + hash = rt_hash(daddr, saddr, fl.flowi_iif, rt_genid(net)); + rth = rt_intern_hash(hash, rth, skb, fl.flowi_iif); err = 0; if (IS_ERR(rth)) err = PTR_ERR(rth); @@ -2369,7 +2369,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; if (!ip_check_mc_rcu(in_dev, oldflp->fl4_dst, oldflp->fl4_src, - oldflp->proto)) + oldflp->flowi_proto)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use * default one, but do not gateway in this case. @@ -2387,8 +2387,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_key_dst = oldflp->fl4_dst; rth->rt_tos = tos; rth->rt_key_src = oldflp->fl4_src; - rth->rt_oif = oldflp->oif; - rth->rt_mark = oldflp->mark; + rth->rt_oif = oldflp->flowi_oif; + rth->rt_mark = oldflp->flowi_mark; rth->rt_dst = fl->fl4_dst; rth->rt_src = fl->fl4_src; rth->rt_iif = 0; @@ -2452,9 +2452,9 @@ static struct rtable *ip_route_output_slow(struct net *net, res.r = NULL; #endif - fl.oif = oldflp->oif; - fl.iif = net->loopback_dev->ifindex; - fl.mark = oldflp->mark; + fl.flowi_oif = oldflp->flowi_oif; + fl.flowi_iif = net->loopback_dev->ifindex; + fl.flowi_mark = oldflp->flowi_mark; fl.fl4_dst = oldflp->fl4_dst; fl.fl4_src = oldflp->fl4_src; fl.fl4_tos = tos & IPTOS_RT_MASK; @@ -2477,7 +2477,7 @@ static struct rtable *ip_route_output_slow(struct net *net, of another iface. --ANK */ - if (oldflp->oif == 0 && + if (oldflp->flowi_oif == 0 && (ipv4_is_multicast(oldflp->fl4_dst) || ipv4_is_lbcast(oldflp->fl4_dst))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ @@ -2500,11 +2500,11 @@ static struct rtable *ip_route_output_slow(struct net *net, Luckily, this hack is good workaround. */ - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; goto make_route; } - if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + if (!(oldflp->flowi_flags & FLOWI_FLAG_ANYSRC)) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ if (!__ip_dev_find(net, oldflp->fl4_src, false)) goto out; @@ -2512,8 +2512,8 @@ static struct rtable *ip_route_output_slow(struct net *net, } - if (oldflp->oif) { - dev_out = dev_get_by_index_rcu(net, oldflp->oif); + if (oldflp->flowi_oif) { + dev_out = dev_get_by_index_rcu(net, oldflp->flowi_oif); rth = ERR_PTR(-ENODEV); if (dev_out == NULL) goto out; @@ -2545,7 +2545,7 @@ static struct rtable *ip_route_output_slow(struct net *net, if (!fl.fl4_dst) fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl.oif = net->loopback_dev->ifindex; + fl.flowi_oif = net->loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2553,7 +2553,7 @@ static struct rtable *ip_route_output_slow(struct net *net, if (fib_lookup(net, &fl, &res)) { res.fi = NULL; - if (oldflp->oif) { + if (oldflp->flowi_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2590,25 +2590,25 @@ static struct rtable *ip_route_output_slow(struct net *net, fl.fl4_src = fl.fl4_dst; } dev_out = net->loopback_dev; - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && fl.oif == 0) + if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) fib_select_multipath(&res); else #endif - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) + if (!res.prefixlen && res.type == RTN_UNICAST && !fl.flowi_oif) fib_select_default(&res); if (!fl.fl4_src) fl.fl4_src = FIB_RES_PREFSRC(res); dev_out = FIB_RES_DEV(res); - fl.oif = dev_out->ifindex; + fl.flowi_oif = dev_out->ifindex; make_route: @@ -2616,9 +2616,9 @@ make_route: if (!IS_ERR(rth)) { unsigned int hash; - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->flowi_oif, rt_genid(dev_net(dev_out))); - rth = rt_intern_hash(hash, rth, NULL, oldflp->oif); + rth = rt_intern_hash(hash, rth, NULL, oldflp->flowi_oif); } out: @@ -2634,7 +2634,7 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) if (!rt_caching(net)) goto slow_output; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->flowi_oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; @@ -2642,8 +2642,8 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) if (rth->rt_key_dst == flp->fl4_dst && rth->rt_key_src == flp->fl4_src && rt_is_output_route(rth) && - rth->rt_oif == flp->oif && - rth->rt_mark == flp->mark && + rth->rt_oif == flp->flowi_oif && + rth->rt_mark == flp->flowi_mark && !((rth->rt_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && @@ -2741,7 +2741,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi *flp, if (IS_ERR(rt)) return rt; - if (flp->proto) { + if (flp->flowi_proto) { if (!flp->fl4_src) flp->fl4_src = rt->rt_src; if (!flp->fl4_dst) @@ -2917,8 +2917,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void .fl4_dst = dst, .fl4_src = src, .fl4_tos = rtm->rtm_tos, - .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, - .mark = mark, + .flowi_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + .flowi_mark = mark, }; rt = ip_route_output_key(net, &fl); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0ad6ddf..98d47dc 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,15 +345,17 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi fl = { .mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = IPPROTO_TCP, - .flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = th->dest, - .fl_ip_dport = th->source }; + struct flowi fl = { + .flowi_mark = sk->sk_mark, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), + .flowi_proto = IPPROTO_TCP, + .flowi_flags = inet_sk_flowi_flags(sk), + .fl_ip_sport = th->dest, + .fl_ip_dport = th->source, + }; security_req_classify_flow(req, &fl); rt = ip_route_output_key(sock_net(sk), &fl); if (IS_ERR(rt)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c9a73e5..e10f62e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -908,16 +908,17 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, - .mark = sk->sk_mark, - .fl4_dst = faddr, - .fl4_src = saddr, - .fl4_tos = tos, - .proto = sk->sk_protocol, - .flags = (inet_sk_flowi_flags(sk) | - FLOWI_FLAG_CAN_SLEEP), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = dport + struct flowi fl = { + .flowi_oif = ipc.oif, + .flowi_mark = sk->sk_mark, + .fl4_dst = faddr, + .fl4_src = saddr, + .fl4_tos = tos, + .flowi_proto = sk->sk_protocol, + .flowi_flags = (inet_sk_flowi_flags(sk) | + FLOWI_FLAG_CAN_SLEEP), + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = dport, }; struct net *net = sock_net(sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c70c42e..4294f12 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -73,9 +73,9 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, rt->rt_key_dst = fl->fl4_dst; rt->rt_key_src = fl->fl4_src; rt->rt_tos = fl->fl4_tos; - rt->rt_iif = fl->iif; - rt->rt_oif = fl->oif; - rt->rt_mark = fl->mark; + rt->rt_iif = fl->flowi_iif; + rt->rt_oif = fl->flowi_oif; + rt->rt_mark = fl->flowi_mark; xdst->u.dst.dev = dev; dev_hold(dev); @@ -104,7 +104,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) u8 *xprth = skb_network_header(skb) + iph->ihl * 4; memset(fl, 0, sizeof(struct flowi)); - fl->mark = skb->mark; + fl->flowi_mark = skb->mark; if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { @@ -173,7 +173,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; } } - fl->proto = iph->protocol; + fl->flowi_proto = iph->protocol; fl->fl4_dst = reverse ? iph->saddr : iph->daddr; fl->fl4_src = reverse ? iph->daddr : iph->saddr; fl->fl4_tos = iph->tos; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 983eff2..d231434 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -32,8 +32,8 @@ __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) sel->family = AF_INET; sel->prefixlen_d = 32; sel->prefixlen_s = 32; - sel->proto = fl->proto; - sel->ifindex = fl->oif; + sel->proto = fl->flowi_proto; + sel->ifindex = fl->flowi_oif; } static void diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a88b2e9..35b0be0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -655,12 +655,12 @@ int inet6_sk_rebuild_header(struct sock *sk) struct flowi fl; memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet->inet_dport; fl.fl_ip_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index be3a781..6c24b26 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -146,16 +146,16 @@ ipv4_connected: * destination cache for it. */ - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet->inet_dport; fl.fl_ip_sport = inet->inet_sport; - if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && (addr_type&IPV6_ADDR_MULTICAST)) + fl.flowi_oif = np->mcast_oif; security_sk_classify_flow(sk, &fl); @@ -299,7 +299,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) mtu_info->ip6m_addr.sin6_family = AF_INET6; mtu_info->ip6m_addr.sin6_port = 0; mtu_info->ip6m_addr.sin6_flowinfo = 0; - mtu_info->ip6m_addr.sin6_scope_id = fl->oif; + mtu_info->ip6m_addr.sin6_scope_id = fl->flowi_oif; ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); __skb_pull(skb, skb_tail_pointer(skb) - skb->data); @@ -629,16 +629,16 @@ int datagram_send_ctl(struct net *net, src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (src_info->ipi6_ifindex) { - if (fl->oif && src_info->ipi6_ifindex != fl->oif) + if (fl->flowi_oif && src_info->ipi6_ifindex != fl->flowi_oif) return -EINVAL; - fl->oif = src_info->ipi6_ifindex; + fl->flowi_oif = src_info->ipi6_ifindex; } addr_type = __ipv6_addr_type(&src_info->ipi6_addr); rcu_read_lock(); - if (fl->oif) { - dev = dev_get_by_index_rcu(net, fl->oif); + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (!dev) { rcu_read_unlock(); return -ENODEV; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 5566595..9e123e0 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -235,7 +235,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - len, fl->proto, + len, fl->flowi_proto, skb->csum); } else { __wsum tmp_csum = 0; @@ -248,7 +248,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - len, fl->proto, + len, fl->flowi_proto, tmp_csum); } ip6_push_pending_frames(sk); @@ -443,11 +443,11 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) mip6_addr_swap(skb); memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_ICMPV6; + fl.flowi_proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); if (saddr) ipv6_addr_copy(&fl.fl6_src, saddr); - fl.oif = iif; + fl.flowi_oif = iif; fl.fl_icmp_type = type; fl.fl_icmp_code = code; security_skb_classify_flow(skb, &fl); @@ -465,8 +465,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) + fl.flowi_oif = np->mcast_oif; dst = icmpv6_route_lookup(net, skb, sk, &fl); if (IS_ERR(dst)) @@ -539,11 +539,11 @@ static void icmpv6_echo_reply(struct sk_buff *skb) tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_ICMPV6; + fl.flowi_proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); if (saddr) ipv6_addr_copy(&fl.fl6_src, saddr); - fl.oif = skb->dev->ifindex; + fl.flowi_oif = skb->dev->ifindex; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, &fl); @@ -552,8 +552,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb) return; np = inet6_sk(sk); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) + fl.flowi_oif = np->mcast_oif; err = ip6_dst_lookup(sk, &dst, &fl); if (err) @@ -793,10 +793,10 @@ void icmpv6_flow_init(struct sock *sk, struct flowi *fl, memset(fl, 0, sizeof(*fl)); ipv6_addr_copy(&fl->fl6_src, saddr); ipv6_addr_copy(&fl->fl6_dst, daddr); - fl->proto = IPPROTO_ICMPV6; + fl->flowi_proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; - fl->oif = oif; + fl->flowi_oif = oif; security_sk_classify_flow(sk, fl); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index d687e13..673f9bf 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -64,12 +64,12 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi fl; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); final_p = fl6_update_dst(&fl, np->opt, &final); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); @@ -213,13 +213,13 @@ int inet6_csk_xmit(struct sk_buff *skb) struct in6_addr *final_p, final; memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_sport = inet->inet_sport; fl.fl_ip_dport = inet->inet_dport; security_sk_classify_flow(sk, &fl); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 1365468..c8fa470 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -358,7 +358,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_controllen = olen; msg.msg_control = (void*)(fl->opt+1); - flowi.oif = 0; + flowi.flowi_oif = 0; err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk, &junk); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index adaffaf..3d0f2ac 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -182,7 +182,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; - u8 proto = fl->proto; + u8 proto = fl->flowi_proto; int seg_len = skb->len; int hlimit = -1; int tclass = 0; @@ -908,7 +908,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || #endif - (fl->oif && fl->oif != dst->dev->ifindex)) { + (fl->flowi_oif && fl->flowi_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -1026,7 +1026,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); if (can_sleep) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; + fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); } @@ -1062,7 +1062,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, if (final_dst) ipv6_addr_copy(&fl->fl6_dst, final_dst); if (can_sleep) - fl->flags |= FLOWI_FLAG_CAN_SLEEP; + fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); } @@ -1517,7 +1517,7 @@ int ip6_push_pending_frames(struct sock *sk) struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; struct flowi *fl = &inet->cork.fl; - unsigned char proto = fl->proto; + unsigned char proto = fl->flowi_proto; int err = 0; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f199b84..c3fc824 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -963,7 +963,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb->transport_header = skb->network_header; - proto = fl->proto; + proto = fl->flowi_proto; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); @@ -1020,7 +1020,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl, &t->fl, sizeof (fl)); - fl.proto = IPPROTO_IPIP; + fl.flowi_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); @@ -1070,7 +1070,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) encap_limit = t->parms.encap_limit; memcpy(&fl, &t->fl, sizeof (fl)); - fl.proto = IPPROTO_IPV6; + fl.flowi_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) @@ -1149,7 +1149,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) /* Set up flowi template */ ipv6_addr_copy(&fl->fl6_src, &p->laddr); ipv6_addr_copy(&fl->fl6_dst, &p->raddr); - fl->oif = p->link; + fl->flowi_oif = p->link; fl->fl6_flowlabel = 0; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 618f67cc..61a8be3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -618,8 +618,8 @@ static int pim6_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct mr6_table *mrt; struct flowi fl = { - .iif = skb->dev->ifindex, - .mark = skb->mark, + .flowi_iif = skb->dev->ifindex, + .flowi_mark = skb->mark, }; int reg_vif_num; @@ -688,9 +688,9 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net *net = dev_net(dev); struct mr6_table *mrt; struct flowi fl = { - .oif = dev->ifindex, - .iif = skb->skb_iif, - .mark = skb->mark, + .flowi_oif = dev->ifindex, + .flowi_iif = skb->skb_iif, + .flowi_mark = skb->mark, }; int err; @@ -1548,9 +1548,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; struct flowi fl = { - .iif = skb->skb_iif, - .oif = skb->dev->ifindex, - .mark = skb->mark, + .flowi_iif = skb->skb_iif, + .flowi_oif = skb->dev->ifindex, + .flowi_mark= skb->mark, }; if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) @@ -1916,7 +1916,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, ipv6h = ipv6_hdr(skb); fl = (struct flowi) { - .oif = vif->link, + .flowi_oif = vif->link, .fl6_dst = ipv6h->daddr, }; @@ -2044,8 +2044,8 @@ int ip6_mr_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct mr6_table *mrt; struct flowi fl = { - .iif = skb->dev->ifindex, - .mark = skb->mark, + .flowi_iif = skb->dev->ifindex, + .flowi_mark= skb->mark, }; int err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1770e0..1448c50 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -448,8 +448,8 @@ sticky_done: int junk; fl.fl6_flowlabel = 0; - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; if (optlen == 0) goto update; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f3e3ca9..e2f852c 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -214,7 +214,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct timeval stamp; int err = 0; - if (unlikely(fl->proto == IPPROTO_MH && + if (unlikely(fl->flowi_proto == IPPROTO_MH && fl->fl_mh_type <= IP6_MH_TYPE_MAX)) goto out; @@ -240,14 +240,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, sizeof(sel.saddr)); sel.prefixlen_s = 128; sel.family = AF_INET6; - sel.proto = fl->proto; + sel.proto = fl->flowi_proto; sel.dport = xfrm_flowi_dport(fl); if (sel.dport) sel.dport_mask = htons(~0); sel.sport = xfrm_flowi_sport(fl); if (sel.sport) sel.sport_mask = htons(~0); - sel.ifindex = fl->oif; + sel.ifindex = fl->flowi_oif; err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8d74116..d282c62 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -16,8 +16,8 @@ int ip6_route_me_harder(struct sk_buff *skb) struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; struct flowi fl = { - .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .mark = skb->mark, + .flowi_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .flowi_mark = skb->mark, .fl6_dst = iph->daddr, .fl6_src = iph->saddr, }; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 91f6a61..fd39388 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -90,7 +90,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) } memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); fl.fl_ip_sport = otcph.dest; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dc29b07..323ad44 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -588,9 +588,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, csum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - total_len, fl->proto, tmp_csum); + total_len, fl->flowi_proto, tmp_csum); - if (csum == 0 && fl->proto == IPPROTO_UDP) + if (csum == 0 && fl->flowi_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; if (skb_store_bits(skb, offset, &csum, 2)) @@ -679,7 +679,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->proto) { + switch (fl->flowi_proto) { case IPPROTO_ICMPV6: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -758,7 +758,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, */ memset(&fl, 0, sizeof(fl)); - fl.mark = sk->sk_mark; + fl.flowi_mark = sk->sk_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -800,7 +800,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + fl.flowi_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; @@ -810,8 +810,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl.fl6_flowlabel = np->flow_label; } - if (fl.oif == 0) - fl.oif = sk->sk_bound_dev_if; + if (fl.flowi_oif == 0) + fl.flowi_oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; @@ -838,7 +838,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = proto; + fl.flowi_proto = proto; err = rawv6_probe_proto_opt(&fl, msg); if (err) goto out; @@ -852,8 +852,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, final_p = fl6_update_dst(&fl, opt, &final); - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) + fl.flowi_oif = np->mcast_oif; security_sk_classify_flow(sk, &fl); dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0012760..c3b20d6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -608,7 +608,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); + rt = rt6_device_match(net, rt, &fl->fl6_src, fl->flowi_oif, flags); BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->dst, jiffies); @@ -621,7 +621,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int strict) { struct flowi fl = { - .oif = oif, + .flowi_oif = oif, .fl6_dst = *daddr, }; struct dst_entry *dst; @@ -825,7 +825,7 @@ out2: static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(net, table, fl->iif, fl, flags); + return ip6_pol_route(net, table, fl->flowi_iif, fl, flags); } void ip6_route_input(struct sk_buff *skb) @@ -834,12 +834,12 @@ void ip6_route_input(struct sk_buff *skb) struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { - .iif = skb->dev->ifindex, + .flowi_iif = skb->dev->ifindex, .fl6_dst = iph->daddr, .fl6_src = iph->saddr, .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - .mark = skb->mark, - .proto = iph->nexthdr, + .flowi_mark = skb->mark, + .flowi_proto = iph->nexthdr, }; if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) @@ -851,7 +851,7 @@ void ip6_route_input(struct sk_buff *skb) static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(net, table, fl->oif, fl, flags); + return ip6_pol_route(net, table, fl->flowi_oif, fl, flags); } struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, @@ -1484,7 +1484,7 @@ restart: continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (fl->oif != rt->rt6i_dev->ifindex) + if (fl->flowi_oif != rt->rt6i_dev->ifindex) continue; if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) continue; @@ -1511,7 +1511,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { .fl = { - .oif = dev->ifindex, + .flowi_oif = dev->ifindex, .fl6_dst = *dest, .fl6_src = *src, }, @@ -2413,7 +2413,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void iif = nla_get_u32(tb[RTA_IIF]); if (tb[RTA_OIF]) - fl.oif = nla_get_u32(tb[RTA_OIF]); + fl.flowi_oif = nla_get_u32(tb[RTA_OIF]); if (iif) { struct net_device *dev; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 0b4cf35..ca5255c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -234,12 +234,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct in6_addr *final_p, final; struct flowi fl; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); final_p = fl6_update_dst(&fl, np->opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, &fl); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e59a31c..a3d1229 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -242,12 +242,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, (saddr ? saddr : &np->saddr)); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->inet_sport; @@ -396,11 +396,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, for now. */ memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.mark = sk->sk_mark; + fl.flowi_oif = sk->sk_bound_dev_if; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet->inet_dport; fl.fl_ip_sport = inet->inet_sport; security_skb_classify_flow(skb, &fl); @@ -487,12 +487,12 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, int err; memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_TCP; + fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.fl6_flowlabel = 0; - fl.oif = treq->iif; - fl.mark = sk->sk_mark; + fl.flowi_oif = treq->iif; + fl.flowi_mark = sk->sk_mark; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); @@ -1055,8 +1055,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); - fl.proto = IPPROTO_TCP; - fl.oif = inet6_iif(skb); + fl.flowi_proto = IPPROTO_TCP; + fl.flowi_oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; security_skb_classify_flow(skb, &fl); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d86d7f6..91f8047 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -915,7 +915,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - up->len, fl->proto, csum ); + up->len, fl->flowi_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1060,7 +1060,7 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + fl.flowi_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; @@ -1071,13 +1071,13 @@ do_udp_sendmsg: connected = 1; } - if (!fl.oif) - fl.oif = sk->sk_bound_dev_if; + if (!fl.flowi_oif) + fl.flowi_oif = sk->sk_bound_dev_if; - if (!fl.oif) - fl.oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl.flowi_oif) + fl.flowi_oif = np->sticky_pktinfo.ipi6_ifindex; - fl.mark = sk->sk_mark; + fl.flowi_mark = sk->sk_mark; if (msg->msg_controllen) { opt = &opt_space; @@ -1105,7 +1105,7 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) ipv6_addr_copy(&fl.fl6_dst, daddr); else @@ -1118,8 +1118,8 @@ do_udp_sendmsg: if (final_p) connected = 0; - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { - fl.oif = np->mcast_oif; + if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { + fl.flowi_oif = np->mcast_oif; connected = 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 48ce496..d62496c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -128,7 +128,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) u8 nexthdr = nh[IP6CB(skb)->nhoff]; memset(fl, 0, sizeof(struct flowi)); - fl->mark = skb->mark; + fl->flowi_mark = skb->mark; ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); @@ -161,7 +161,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_ip_sport = ports[!!reverse]; fl->fl_ip_dport = ports[!reverse]; } - fl->proto = nexthdr; + fl->flowi_proto = nexthdr; return; case IPPROTO_ICMPV6: @@ -171,7 +171,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_icmp_type = icmp[0]; fl->fl_icmp_code = icmp[1]; } - fl->proto = nexthdr; + fl->flowi_proto = nexthdr; return; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -182,7 +182,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_mh_type = mh->ip6mh_type; } - fl->proto = nexthdr; + fl->flowi_proto = nexthdr; return; #endif @@ -192,7 +192,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_COMP: default: fl->fl_ipsec_spi = 0; - fl->proto = nexthdr; + fl->flowi_proto = nexthdr; return; } } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a02598e..805d0e1 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -33,8 +33,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) sel->family = AF_INET6; sel->prefixlen_d = 128; sel->prefixlen_s = 128; - sel->proto = fl->proto; - sel->ifindex = fl->oif; + sel->proto = fl->flowi_proto; + sel->ifindex = fl->flowi_oif; } static void diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d69ec26..d07a32a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -76,7 +76,7 @@ static int __ip_vs_addr_is_local_v6(struct net *net, { struct rt6_info *rt; struct flowi fl = { - .oif = 0, + .flowi_oif = 0, .fl6_dst = *addr, .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, }; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index faf381d..cc8071f 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -169,7 +169,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb) .fl4_dst = iph->daddr, .fl4_src = iph->saddr, .fl4_tos = RT_TOS(iph->tos), - .mark = skb->mark, + .flowi_mark = skb->mark, }; rt = ip_route_output_key(net, &fl); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 624725b..cb14ae2 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -68,7 +68,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) if (info->priv) { if (info->priv->oif == -1) return false; - fl.oif = info->priv->oif; + fl.flowi_oif = info->priv->oif; } fl.fl4_dst = info->gw.ip; fl.fl4_tos = RT_TOS(iph->tos); @@ -149,7 +149,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) if (info->priv) { if (info->priv->oif == -1) return false; - fl.oif = info->priv->oif; + fl.flowi_oif = info->priv->oif; } fl.fl6_dst = info->gw.in6; fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 95e0c8e..8316271 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -205,7 +205,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) memset(&fl, 0, sizeof(fl)); - fl.proto = sk->sk_protocol; + fl.flowi_proto = sk->sk_protocol; /* Fill in the dest address from the route entry passed with the skb * and the source address from the transport. @@ -216,9 +216,9 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) fl.fl6_flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL) - fl.oif = transport->saddr.v6.sin6_scope_id; + fl.flowi_oif = transport->saddr.v6.sin6_scope_id; else - fl.oif = sk->sk_bound_dev_if; + fl.flowi_oif = sk->sk_bound_dev_if; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; @@ -250,7 +250,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) - fl.oif = daddr->v6.sin6_scope_id; + fl.flowi_oif = daddr->v6.sin6_scope_id; SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4e55e6c..832665a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -477,10 +477,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, memset(&fl, 0x0, sizeof(struct flowi)); fl.fl4_dst = daddr->v4.sin_addr.s_addr; fl.fl_ip_dport = daddr->v4.sin_port; - fl.proto = IPPROTO_SCTP; + fl.flowi_proto = IPPROTO_SCTP; if (asoc) { fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); - fl.oif = asoc->base.sk->sk_bound_dev_if; + fl.flowi_oif = asoc->base.sk->sk_bound_dev_if; fl.fl_ip_sport = htons(asoc->base.bind_addr.port); } if (saddr) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e4aacd..dd6243f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -63,8 +63,8 @@ __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + (fl->flowi_proto == sel->proto || !sel->proto) && + (fl->flowi_oif == sel->ifindex || !sel->ifindex); } static inline int @@ -74,8 +74,8 @@ __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && - (fl->proto == sel->proto || !sel->proto) && - (fl->oif == sel->ifindex || !sel->ifindex); + (fl->flowi_proto == sel->proto || !sel->proto) && + (fl->flowi_oif == sel->ifindex || !sel->ifindex); } int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, @@ -876,13 +876,13 @@ static int xfrm_policy_match(const struct xfrm_policy *pol, int match, ret = -ESRCH; if (pol->family != family || - (fl->mark & pol->mark.m) != pol->mark.v || + (fl->flowi_mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; match = xfrm_selector_match(sel, fl, family); if (match) - ret = security_xfrm_policy_lookup(pol->security, fl->secid, + ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, dir); return ret; @@ -1012,7 +1012,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, goto out; } err = security_xfrm_policy_lookup(pol->security, - fl->secid, + fl->flowi_secid, policy_to_flow_dir(dir)); if (!err) xfrm_pol_hold(pol); @@ -1848,7 +1848,7 @@ restart: return make_blackhole(net, family, dst_orig); } - if (fl->flags & FLOWI_FLAG_CAN_SLEEP) { + if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(&net->xfrm.km_waitq, &wait); @@ -1990,7 +1990,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, return -EAFNOSUPPORT; afinfo->decode_session(skb, fl, reverse); - err = security_xfrm_decode_session(skb, &fl->secid); + err = security_xfrm_decode_session(skb, &fl->flowi_secid); xfrm_policy_put_afinfo(afinfo); return err; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 81221d9..cd6be49 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -859,7 +859,7 @@ found: xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); - error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid); if (error) { x->km.state = XFRM_STATE_DEAD; to_put = x; -- cgit v1.1 From 6281dcc94a96bd73017b2baa8fa83925405109ef Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 00:43:55 -0500 Subject: net: Make flowi ports AF dependent. Create two sets of port member accessors, one set prefixed by fl4_* and the other prefixed by fl6_* This will let us to create AF optimal flow instances. It will work because every context in which we access the ports, we have to be fully aware of which AF the flowi is anyways. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 4 ++-- net/dccp/ipv6.c | 20 ++++++++++---------- net/ipv4/icmp.c | 4 ++-- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_output.c | 4 ++-- net/ipv4/netfilter/nf_nat_standalone.c | 4 ++-- net/ipv4/raw.c | 4 ++-- net/ipv4/syncookies.c | 4 ++-- net/ipv4/udp.c | 10 +++++----- net/ipv4/xfrm4_policy.c | 18 +++++++++--------- net/ipv4/xfrm4_state.c | 4 ++-- net/ipv6/af_inet6.c | 4 ++-- net/ipv6/datagram.c | 6 +++--- net/ipv6/icmp.c | 10 +++++----- net/ipv6/inet6_connection_sock.c | 8 ++++---- net/ipv6/mip6.c | 6 +++--- net/ipv6/netfilter/ip6t_REJECT.c | 4 ++-- net/ipv6/raw.c | 6 +++--- net/ipv6/syncookies.c | 4 ++-- net/ipv6/tcp_ipv6.c | 16 ++++++++-------- net/ipv6/udp.c | 10 +++++----- net/ipv6/xfrm6_policy.c | 12 ++++++------ net/ipv6/xfrm6_state.c | 4 ++-- net/sctp/protocol.c | 8 ++++---- net/xfrm/xfrm_policy.c | 8 ++++---- 25 files changed, 93 insertions(+), 93 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 09a0991..d934b20 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -471,8 +471,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, .fl4_src = ip_hdr(skb)->daddr, .fl4_tos = RT_CONN_FLAGS(sk), .flowi_proto = sk->sk_protocol, - .fl_ip_sport = dccp_hdr(skb)->dccph_dport, - .fl_ip_dport = dccp_hdr(skb)->dccph_sport, + .fl4_sport = dccp_hdr(skb)->dccph_dport, + .fl4_dport = dccp_hdr(skb)->dccph_sport, }; security_skb_classify_flow(skb, &fl); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5209ee7..2b351c6 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -158,8 +158,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.flowi_oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; + fl.fl6_dport = inet->inet_dport; + fl.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); dst = ip6_dst_lookup_flow(sk, &fl, NULL, false); @@ -253,8 +253,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.fl6_flowlabel = 0; fl.flowi_oif = ireq6->iif; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; + fl.fl6_dport = inet_rsk(req)->rmt_port; + fl.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); opt = np->opt; @@ -323,8 +323,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) fl.flowi_proto = IPPROTO_DCCP; fl.flowi_oif = inet6_iif(rxskb); - fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; - fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; + fl.fl6_dport = dccp_hdr(skb)->dccph_dport; + fl.fl6_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ @@ -535,8 +535,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, final_p = fl6_update_dst(&fl, opt, &final); ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.flowi_oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; + fl.fl6_dport = inet_rsk(req)->rmt_port; + fl.fl6_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, &fl); dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); @@ -957,8 +957,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&fl.fl6_dst, &np->daddr); ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); fl.flowi_oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->inet_sport; + fl.fl6_dport = usin->sin6_port; + fl.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); final_p = fl6_update_dst(&fl, np->opt, &final); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3fde7f2..8d09195 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -384,8 +384,8 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, .fl4_src = saddr, .fl4_tos = RT_TOS(tos), .flowi_proto = IPPROTO_ICMP, - .fl_icmp_type = type, - .fl_icmp_code = code, + .fl4_icmp_type = type, + .fl4_icmp_code = code, }; struct rtable *rt, *rt2; int err; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9708170..10a8e95 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -365,8 +365,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, .fl4_tos = RT_CONN_FLAGS(sk), .flowi_proto = sk->sk_protocol, .flowi_flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = inet_sk(sk)->inet_sport, - .fl_ip_dport = ireq->rmt_port, + .fl4_sport = inet_sk(sk)->inet_sport, + .fl4_dport = ireq->rmt_port, }; struct net *net = sock_net(sk); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e35ca40..67e5f71 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1479,8 +1479,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .fl4_dst = daddr, .fl4_src = rt->rt_spec_dst, .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl_ip_sport = tcp_hdr(skb)->dest, - .fl_ip_dport = tcp_hdr(skb)->source, + .fl4_sport = tcp_hdr(skb)->dest, + .fl4_dport = tcp_hdr(skb)->source, .flowi_proto = sk->sk_protocol, .flowi_flags = ip_reply_arg_flowi_flags(arg), }; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 95481fe..1f3c695 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -55,7 +55,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl->fl_ip_dport = t->dst.u.tcp.port; + fl->fl4_dport = t->dst.u.tcp.port; } statusbit ^= IPS_NAT_MASK; @@ -67,7 +67,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl->fl_ip_sport = t->src.u.tcp.port; + fl->fl4_sport = t->src.u.tcp.port; } } #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index b42b7cd..333b826 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -433,8 +433,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl->fl_icmp_type, type) || - get_user(fl->fl_icmp_code, code)) + if (get_user(fl->fl4_icmp_type, type) || + get_user(fl->fl4_icmp_code, code)) return -EFAULT; probed = 1; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 98d47dc..d90529d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -353,8 +353,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .fl4_tos = RT_CONN_FLAGS(sk), .flowi_proto = IPPROTO_TCP, .flowi_flags = inet_sk_flowi_flags(sk), - .fl_ip_sport = th->dest, - .fl_ip_dport = th->source, + .fl4_sport = th->dest, + .fl4_dport = th->source, }; security_req_classify_flow(req, &fl); rt = ip_route_output_key(sock_net(sk), &fl); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e10f62e..116e4a8 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -778,7 +778,7 @@ static int udp_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_send_skb(skb, fl->fl4_dst, fl->fl_ip_dport); + err = udp_send_skb(skb, fl->fl4_dst, fl->fl4_dport); out: up->len = 0; @@ -917,8 +917,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .flowi_proto = sk->sk_protocol, .flowi_flags = (inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP), - .fl_ip_sport = inet->inet_sport, - .fl_ip_dport = dport, + .fl4_sport = inet->inet_sport, + .fl4_dport = dport, }; struct net *net = sock_net(sk); @@ -973,9 +973,9 @@ back_from_confirm: * Now cork the socket to pend data. */ inet->cork.fl.fl4_dst = daddr; - inet->cork.fl.fl_ip_dport = dport; + inet->cork.fl.fl4_dport = dport; inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->inet_sport; + inet->cork.fl.fl4_sport = inet->inet_sport; up->pending = AF_INET; do_append_data: diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 4294f12..b7b0921 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -117,8 +117,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports = (__be16 *)xprth; - fl->fl_ip_sport = ports[!!reverse]; - fl->fl_ip_dport = ports[!reverse]; + fl->fl4_sport = ports[!!reverse]; + fl->fl4_dport = ports[!reverse]; } break; @@ -126,8 +126,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp = xprth; - fl->fl_icmp_type = icmp[0]; - fl->fl_icmp_code = icmp[1]; + fl->fl4_icmp_type = icmp[0]; + fl->fl4_icmp_code = icmp[1]; } break; @@ -135,7 +135,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr = (__be32 *)xprth; - fl->fl_ipsec_spi = ehdr[0]; + fl->fl4_ipsec_spi = ehdr[0]; } break; @@ -143,7 +143,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr = (__be32*)xprth; - fl->fl_ipsec_spi = ah_hdr[1]; + fl->fl4_ipsec_spi = ah_hdr[1]; } break; @@ -151,7 +151,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr = (__be16 *)xprth; - fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); + fl->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; @@ -163,13 +163,13 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) gre_hdr++; - fl->fl_gre_key = gre_hdr[1]; + fl->fl4_gre_key = gre_hdr[1]; } } break; default: - fl->fl_ipsec_spi = 0; + fl->fl4_ipsec_spi = 0; break; } } diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index d231434..663b550 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -25,9 +25,9 @@ __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { sel->daddr.a4 = fl->fl4_dst; sel->saddr.a4 = fl->fl4_src; - sel->dport = xfrm_flowi_dport(fl); + sel->dport = xfrm_flowi_dport(fl, &fl->uli_u); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl); + sel->sport = xfrm_flowi_sport(fl, &fl->uli_u); sel->sport_mask = htons(0xffff); sel->family = AF_INET; sel->prefixlen_d = 32; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 35b0be0..923febe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -661,8 +661,8 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.fl6_flowlabel = np->flow_label; fl.flowi_oif = sk->sk_bound_dev_if; fl.flowi_mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; + fl.fl6_dport = inet->inet_dport; + fl.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, &fl); final_p = fl6_update_dst(&fl, np->opt, &final); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 6c24b26..07e03e6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -151,8 +151,8 @@ ipv4_connected: ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.flowi_oif = sk->sk_bound_dev_if; fl.flowi_mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; + fl.fl6_dport = inet->inet_dport; + fl.fl6_sport = inet->inet_sport; if (!fl.flowi_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl.flowi_oif = np->mcast_oif; @@ -261,7 +261,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) serr->ee.ee_info = info; serr->ee.ee_data = 0; serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); - serr->port = fl->fl_ip_dport; + serr->port = fl->fl6_dport; __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 9e123e0..52ff7aa 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -448,8 +448,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (saddr) ipv6_addr_copy(&fl.fl6_src, saddr); fl.flowi_oif = iif; - fl.fl_icmp_type = type; - fl.fl_icmp_code = code; + fl.fl6_icmp_type = type; + fl.fl6_icmp_code = code; security_skb_classify_flow(skb, &fl); sk = icmpv6_xmit_lock(net); @@ -544,7 +544,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (saddr) ipv6_addr_copy(&fl.fl6_src, saddr); fl.flowi_oif = skb->dev->ifindex; - fl.fl_icmp_type = ICMPV6_ECHO_REPLY; + fl.fl6_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, &fl); sk = icmpv6_xmit_lock(net); @@ -794,8 +794,8 @@ void icmpv6_flow_init(struct sock *sk, struct flowi *fl, ipv6_addr_copy(&fl->fl6_src, saddr); ipv6_addr_copy(&fl->fl6_dst, daddr); fl->flowi_proto = IPPROTO_ICMPV6; - fl->fl_icmp_type = type; - fl->fl_icmp_code = 0; + fl->fl6_icmp_type = type; + fl->fl6_icmp_code = 0; fl->flowi_oif = oif; security_sk_classify_flow(sk, fl); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 673f9bf..1b06a24 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -70,8 +70,8 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.flowi_oif = sk->sk_bound_dev_if; fl.flowi_mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; + fl.fl6_dport = inet_rsk(req)->rmt_port; + fl.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); @@ -220,8 +220,8 @@ int inet6_csk_xmit(struct sk_buff *skb) IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); fl.flowi_oif = sk->sk_bound_dev_if; fl.flowi_mark = sk->sk_mark; - fl.fl_ip_sport = inet->inet_sport; - fl.fl_ip_dport = inet->inet_dport; + fl.fl6_sport = inet->inet_sport; + fl.fl6_dport = inet->inet_dport; security_sk_classify_flow(sk, &fl); final_p = fl6_update_dst(&fl, np->opt, &final); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index e2f852c..5038e6b 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -215,7 +215,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, int err = 0; if (unlikely(fl->flowi_proto == IPPROTO_MH && - fl->fl_mh_type <= IP6_MH_TYPE_MAX)) + fl->fl6_mh_type <= IP6_MH_TYPE_MAX)) goto out; if (likely(opt->dsthao)) { @@ -241,10 +241,10 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, sel.prefixlen_s = 128; sel.family = AF_INET6; sel.proto = fl->flowi_proto; - sel.dport = xfrm_flowi_dport(fl); + sel.dport = xfrm_flowi_dport(fl, &fl->uli_u); if (sel.dport) sel.dport_mask = htons(~0); - sel.sport = xfrm_flowi_sport(fl); + sel.sport = xfrm_flowi_sport(fl, &fl->uli_u); if (sel.sport) sel.sport_mask = htons(~0); sel.ifindex = fl->flowi_oif; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index fd39388..d1e905b 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -93,8 +93,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) fl.flowi_proto = IPPROTO_TCP; ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); - fl.fl_ip_sport = otcph.dest; - fl.fl_ip_dport = otcph.source; + fl.fl6_sport = otcph.dest; + fl.fl6_dport = otcph.source; security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(net, NULL, &fl); if (dst == NULL || dst->error) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 323ad44..d061465 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -694,8 +694,8 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl->fl_icmp_type, type) || - get_user(fl->fl_icmp_code, code)) + if (get_user(fl->fl6_icmp_type, type) || + get_user(fl->fl6_icmp_code, code)) return -EFAULT; probed = 1; } @@ -706,7 +706,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) /* check if type field is readable or not. */ if (iov->iov_len > 2 - len) { u8 __user *p = iov->iov_base; - if (get_user(fl->fl_mh_type, &p[2 - len])) + if (get_user(fl->fl6_mh_type, &p[2 - len])) return -EFAULT; probed = 1; } else diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ca5255c..5b9eded 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -240,8 +240,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.flowi_oif = sk->sk_bound_dev_if; fl.flowi_mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->inet_sport; + fl.fl6_dport = inet_rsk(req)->rmt_port; + fl.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, &fl); dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a3d1229..c531ad5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -248,8 +248,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, (saddr ? saddr : &np->saddr)); fl.flowi_oif = sk->sk_bound_dev_if; fl.flowi_mark = sk->sk_mark; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->inet_sport; + fl.fl6_dport = usin->sin6_port; + fl.fl6_sport = inet->inet_sport; final_p = fl6_update_dst(&fl, np->opt, &final); @@ -401,8 +401,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_addr_copy(&fl.fl6_src, &np->saddr); fl.flowi_oif = sk->sk_bound_dev_if; fl.flowi_mark = sk->sk_mark; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; + fl.fl6_dport = inet->inet_dport; + fl.fl6_sport = inet->inet_sport; security_skb_classify_flow(skb, &fl); dst = ip6_dst_lookup_flow(sk, &fl, NULL, false); @@ -493,8 +493,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.fl6_flowlabel = 0; fl.flowi_oif = treq->iif; fl.flowi_mark = sk->sk_mark; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; + fl.fl6_dport = inet_rsk(req)->rmt_port; + fl.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); opt = np->opt; @@ -1057,8 +1057,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl.flowi_proto = IPPROTO_TCP; fl.flowi_oif = inet6_iif(skb); - fl.fl_ip_dport = t1->dest; - fl.fl_ip_sport = t1->source; + fl.fl6_dport = t1->dest; + fl.fl6_sport = t1->source; security_skb_classify_flow(skb, &fl); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91f8047..dad035f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -899,8 +899,8 @@ static int udp_v6_push_pending_frames(struct sock *sk) * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; + uh->source = fl->fl6_sport; + uh->dest = fl->fl6_dport; uh->len = htons(up->len); uh->check = 0; @@ -1036,7 +1036,7 @@ do_udp_sendmsg: if (sin6->sin6_port == 0) return -EINVAL; - fl.fl_ip_dport = sin6->sin6_port; + fl.fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { @@ -1065,7 +1065,7 @@ do_udp_sendmsg: if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl.fl_ip_dport = inet->inet_dport; + fl.fl6_dport = inet->inet_dport; daddr = &np->daddr; fl.fl6_flowlabel = np->flow_label; connected = 1; @@ -1112,7 +1112,7 @@ do_udp_sendmsg: fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl_ip_sport = inet->inet_sport; + fl.fl6_sport = inet->inet_sport; final_p = fl6_update_dst(&fl, opt, &final); if (final_p) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d62496c..213c759 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -158,8 +158,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, nh + offset + 4 - skb->data))) { __be16 *ports = (__be16 *)exthdr; - fl->fl_ip_sport = ports[!!reverse]; - fl->fl_ip_dport = ports[!reverse]; + fl->fl6_sport = ports[!!reverse]; + fl->fl6_dport = ports[!reverse]; } fl->flowi_proto = nexthdr; return; @@ -168,8 +168,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { u8 *icmp = (u8 *)exthdr; - fl->fl_icmp_type = icmp[0]; - fl->fl_icmp_code = icmp[1]; + fl->fl6_icmp_type = icmp[0]; + fl->fl6_icmp_code = icmp[1]; } fl->flowi_proto = nexthdr; return; @@ -180,7 +180,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ip6_mh *mh; mh = (struct ip6_mh *)exthdr; - fl->fl_mh_type = mh->ip6mh_type; + fl->fl6_mh_type = mh->ip6mh_type; } fl->flowi_proto = nexthdr; return; @@ -191,7 +191,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ESP: case IPPROTO_COMP: default: - fl->fl_ipsec_spi = 0; + fl->fl6_ipsec_spi = 0; fl->flowi_proto = nexthdr; return; } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 805d0e1..71277ce 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -26,9 +26,9 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) * to current session. */ ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); - sel->dport = xfrm_flowi_dport(fl); + sel->dport = xfrm_flowi_dport(fl, &fl->uli_u); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl); + sel->sport = xfrm_flowi_sport(fl, &fl->uli_u); sel->sport_mask = htons(0xffff); sel->family = AF_INET6; sel->prefixlen_d = 128; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 832665a..b6fa294 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -476,16 +476,16 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, memset(&fl, 0x0, sizeof(struct flowi)); fl.fl4_dst = daddr->v4.sin_addr.s_addr; - fl.fl_ip_dport = daddr->v4.sin_port; + fl.fl4_dport = daddr->v4.sin_port; fl.flowi_proto = IPPROTO_SCTP; if (asoc) { fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); fl.flowi_oif = asoc->base.sk->sk_bound_dev_if; - fl.fl_ip_sport = htons(asoc->base.bind_addr.port); + fl.fl4_sport = htons(asoc->base.bind_addr.port); } if (saddr) { fl.fl4_src = saddr->v4.sin_addr.s_addr; - fl.fl_ip_sport = saddr->v4.sin_port; + fl.fl4_sport = saddr->v4.sin_port; } SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", @@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; - fl.fl_ip_sport = laddr->a.v4.sin_port; + fl.fl4_sport = laddr->a.v4.sin_port; rt = ip_route_output_key(&init_net, &fl); if (!IS_ERR(rt)) { dst = &rt->dst; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index dd6243f..d54b6e7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -61,8 +61,8 @@ __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && + !((xfrm_flowi_dport(fl, &fl->uli_u) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl->uli_u) ^ sel->sport) & sel->sport_mask) && (fl->flowi_proto == sel->proto || !sel->proto) && (fl->flowi_oif == sel->ifindex || !sel->ifindex); } @@ -72,8 +72,8 @@ __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) && + !((xfrm_flowi_dport(fl, &fl->uli_u) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl->uli_u) ^ sel->sport) & sel->sport_mask) && (fl->flowi_proto == sel->proto || !sel->proto) && (fl->flowi_oif == sel->ifindex || !sel->ifindex); } -- cgit v1.1 From 56bb8059e1a8bf291054c26367564dc302f6fd8f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 00:44:35 -0500 Subject: net: Break struct flowi out into AF specific instances. Now we have struct flowi4, flowi6, and flowidn for each address family. And struct flowi is just a union of them all. It might have been troublesome to convert flow_cache_uli_match() but as it turns out this function is completely unused and therefore can be simply removed. Signed-off-by: David S. Miller --- net/ipv4/xfrm4_state.c | 4 ++-- net/ipv6/mip6.c | 4 ++-- net/ipv6/xfrm6_state.c | 4 ++-- net/xfrm/xfrm_policy.c | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 663b550..d8d5419 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -25,9 +25,9 @@ __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { sel->daddr.a4 = fl->fl4_dst; sel->saddr.a4 = fl->fl4_src; - sel->dport = xfrm_flowi_dport(fl, &fl->uli_u); + sel->dport = xfrm_flowi_dport(fl, &fl->u.ip4.uli); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl, &fl->uli_u); + sel->sport = xfrm_flowi_sport(fl, &fl->u.ip4.uli); sel->sport_mask = htons(0xffff); sel->family = AF_INET; sel->prefixlen_d = 32; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 5038e6b..e1767ae 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -241,10 +241,10 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, sel.prefixlen_s = 128; sel.family = AF_INET6; sel.proto = fl->flowi_proto; - sel.dport = xfrm_flowi_dport(fl, &fl->uli_u); + sel.dport = xfrm_flowi_dport(fl, &fl->u.ip6.uli); if (sel.dport) sel.dport_mask = htons(~0); - sel.sport = xfrm_flowi_sport(fl, &fl->uli_u); + sel.sport = xfrm_flowi_sport(fl, &fl->u.ip6.uli); if (sel.sport) sel.sport_mask = htons(~0); sel.ifindex = fl->flowi_oif; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 71277ce..b456533 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -26,9 +26,9 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) * to current session. */ ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); - sel->dport = xfrm_flowi_dport(fl, &fl->uli_u); + sel->dport = xfrm_flowi_dport(fl, &fl->u.ip6.uli); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl, &fl->uli_u); + sel->sport = xfrm_flowi_sport(fl, &fl->u.ip6.uli); sel->sport_mask = htons(0xffff); sel->family = AF_INET6; sel->prefixlen_d = 128; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d54b6e7..2ecd18a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -61,8 +61,8 @@ __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl, &fl->uli_u) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl, &fl->uli_u) ^ sel->sport) & sel->sport_mask) && + !((xfrm_flowi_dport(fl, &fl->u.ip4.uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl->u.ip4.uli) ^ sel->sport) & sel->sport_mask) && (fl->flowi_proto == sel->proto || !sel->proto) && (fl->flowi_oif == sel->ifindex || !sel->ifindex); } @@ -72,8 +72,8 @@ __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl, &fl->uli_u) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl, &fl->uli_u) ^ sel->sport) & sel->sport_mask) && + !((xfrm_flowi_dport(fl, &fl->u.ip6.uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl->u.ip6.uli) ^ sel->sport) & sel->sport_mask) && (fl->flowi_proto == sel->proto || !sel->proto) && (fl->flowi_oif == sel->ifindex || !sel->ifindex); } -- cgit v1.1 From 22bd5b9b13f2931ac80949f8bfbc40e8cab05be7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 11 Mar 2011 19:54:08 -0500 Subject: ipv4: Pass ipv4 flow objects into fib_lookup() paths. To start doing these conversions, we need to add some temporary flow4_* macros which will eventually go away when all the protocol code paths are changed to work on AF specific flowi objects. Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 8 ++++---- net/ipv4/fib_rules.c | 6 +++--- net/ipv4/fib_semantics.c | 2 +- net/ipv4/fib_trie.c | 12 ++++++------ net/ipv4/route.c | 6 +++--- 5 files changed, 17 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7610528..48125d5 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -158,7 +158,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, if (local_table) { ret = RTN_UNICAST; rcu_read_lock(); - if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { + if (!fib_table_lookup(local_table, &fl.u.ip4, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; } @@ -222,7 +222,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, goto e_inval; net = dev_net(dev); - if (fib_lookup(net, &fl, &res)) + if (fib_lookup(net, &fl.u.ip4, &res)) goto last_resort; if (res.type != RTN_UNICAST) { if (res.type != RTN_LOCAL || !accept_local) @@ -256,7 +256,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, fl.flowi_oif = dev->ifindex; ret = 0; - if (fib_lookup(net, &fl, &res) == 0) { + if (fib_lookup(net, &fl.u.ip4, &res) == 0) { if (res.type == RTN_UNICAST) { *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; @@ -813,7 +813,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) frn->tb_id = tb->tb_id; rcu_read_lock(); - frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); + frn->err = fib_table_lookup(tb, &fl.u.ip4, &res, FIB_LOOKUP_NOREF); if (!frn->err) { frn->prefixlen = res.prefixlen; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 3018efb..0c63c4a 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -53,7 +53,7 @@ u32 fib_rules_tclass(const struct fib_result *res) } #endif -int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) +int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, @@ -61,7 +61,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) }; int err; - err = fib_rules_lookup(net->ipv4.rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); res->r = arg.rule; return err; @@ -95,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, if (!tbl) goto errout; - err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); + err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags); if (err > 0) err = -EAGAIN; errout: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 79179ad..a721013 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -569,7 +569,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope < RT_SCOPE_LINK) fl.fl4_scope = RT_SCOPE_LINK; - err = fib_lookup(net, &fl, &res); + err = fib_lookup(net, &fl.u.ip4, &res); if (err) { rcu_read_unlock(); return err; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d5ff80e..3d28a35 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1341,7 +1341,7 @@ err: /* should be called with rcu_read_lock */ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, - t_key key, const struct flowi *flp, + t_key key, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { struct leaf_info *li; @@ -1360,9 +1360,9 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, struct fib_info *fi = fa->fa_info; int nhsel, err; - if (fa->fa_tos && fa->fa_tos != flp->fl4_tos) + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) continue; - if (fa->fa_scope < flp->fl4_scope) + if (fa->fa_scope < flp->flowi4_scope) continue; fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; @@ -1379,7 +1379,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, if (nh->nh_flags & RTNH_F_DEAD) continue; - if (flp->flowi_oif && flp->flowi_oif != nh->nh_oif) + if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -1406,7 +1406,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l, return 1; } -int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, +int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags) { struct trie *t = (struct trie *) tb->tb_data; @@ -1414,7 +1414,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, struct rt_trie_node *n; struct tnode *pn; unsigned int pos, bits; - t_key key = ntohl(flp->fl4_dst); + t_key key = ntohl(flp->daddr); unsigned int chopped_off; t_key cindex = 0; unsigned int current_prefix_length = KEYLENGTH; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c9aa4f9..027b4cc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1707,7 +1707,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) }; rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), &fl, &res) == 0) + if (fib_lookup(dev_net(rt->dst.dev), &fl.u.ip4, &res) == 0) src = FIB_RES_PREFSRC(res); else src = inet_select_addr(rt->dst.dev, rt->rt_gateway, @@ -2125,7 +2125,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl.fl4_src = saddr; fl.fl4_tos = tos; fl.fl4_scope = RT_SCOPE_UNIVERSE; - err = fib_lookup(net, &fl, &res); + err = fib_lookup(net, &fl.u.ip4, &res); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_hostunreach; @@ -2551,7 +2551,7 @@ static struct rtable *ip_route_output_slow(struct net *net, goto make_route; } - if (fib_lookup(net, &fl, &res)) { + if (fib_lookup(net, &fl.u.ip4, &res)) { res.fi = NULL; if (oldflp->flowi_oif) { /* Apparently, routing tables are wrong. Assume, -- cgit v1.1 From 68a5e3dd0a0056d8b349f9eea3756adda53ec17a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 11 Mar 2011 20:07:33 -0500 Subject: ipv4: Use struct flowi4 internally in routing lookups. We will change the externally visible APIs next. Signed-off-by: David S. Miller --- net/ipv4/route.c | 230 +++++++++++++++++++++++++++---------------------------- 1 file changed, 115 insertions(+), 115 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 027b4cc..9e938f9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -109,8 +109,8 @@ #include #endif -#define RT_FL_TOS(oldflp) \ - ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) +#define RT_FL_TOS(oldflp4) \ + ((u32)(oldflp4->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))) #define IP_MAX_MTU 0xFFF0 @@ -1697,17 +1697,17 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt_is_output_route(rt)) src = rt->rt_src; else { - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .flowi_oif = rt->rt_oif, - .flowi_iif = rt->rt_iif, - .flowi_mark = rt->rt_mark, + struct flowi4 fl4 = { + .daddr = rt->rt_key_dst, + .saddr = rt->rt_key_src, + .flowi4_tos = rt->rt_tos, + .flowi4_oif = rt->rt_oif, + .flowi4_iif = rt->rt_iif, + .flowi4_mark = rt->rt_mark, }; rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), &fl.u.ip4, &res) == 0) + if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) src = FIB_RES_PREFSRC(res); else src = inet_select_addr(rt->dst.dev, rt->rt_gateway, @@ -1757,7 +1757,7 @@ static unsigned int ipv4_default_mtu(const struct dst_entry *dst) return mtu; } -static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, +static void rt_init_metrics(struct rtable *rt, const struct flowi4 *oldflp4, struct fib_info *fi) { struct inet_peer *peer; @@ -1766,7 +1766,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, /* If a peer entry exists for this destination, we must hook * it up in order to get at cached metrics. */ - if (oldflp && (oldflp->flowi_flags & FLOWI_FLAG_PRECOW_METRICS)) + if (oldflp4 && (oldflp4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS)) create = 1; rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create); @@ -1793,7 +1793,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi *oldflp, } } -static void rt_set_nexthop(struct rtable *rt, const struct flowi *oldflp, +static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *oldflp4, const struct fib_result *res, struct fib_info *fi, u16 type, u32 itag) { @@ -1803,7 +1803,7 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi *oldflp, if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - rt_init_metrics(rt, oldflp, fi); + rt_init_metrics(rt, oldflp4, fi); #ifdef CONFIG_IP_ROUTE_CLASSID dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif @@ -2038,7 +2038,7 @@ static int __mkroute_input(struct sk_buff *skb, static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, - const struct flowi *fl, + const struct flowi4 *fl4, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { @@ -2057,9 +2057,9 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->flowi_iif, + hash = rt_hash(daddr, saddr, fl4->flowi4_iif, rt_genid(dev_net(rth->dst.dev))); - rth = rt_intern_hash(hash, rth, skb, fl->flowi_iif); + rth = rt_intern_hash(hash, rth, skb, fl4->flowi4_iif); if (IS_ERR(rth)) return PTR_ERR(rth); return 0; @@ -2081,7 +2081,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct flowi fl; + struct flowi4 fl4; unsigned flags = 0; u32 itag = 0; struct rtable * rth; @@ -2118,14 +2118,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - fl.flowi_oif = 0; - fl.flowi_iif = dev->ifindex; - fl.flowi_mark = skb->mark; - fl.fl4_dst = daddr; - fl.fl4_src = saddr; - fl.fl4_tos = tos; - fl.fl4_scope = RT_SCOPE_UNIVERSE; - err = fib_lookup(net, &fl.u.ip4, &res); + fl4.flowi4_oif = 0; + fl4.flowi4_iif = dev->ifindex; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_tos = tos; + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.daddr = daddr; + fl4.saddr = saddr; + err = fib_lookup(net, &fl4, &res); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_hostunreach; @@ -2154,7 +2154,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos); out: return err; brd_input: @@ -2205,8 +2205,8 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.flowi_iif, rt_genid(net)); - rth = rt_intern_hash(hash, rth, skb, fl.flowi_iif); + hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net)); + rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif); err = 0; if (IS_ERR(rth)) err = PTR_ERR(rth); @@ -2335,25 +2335,25 @@ EXPORT_SYMBOL(ip_route_input_common); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, - const struct flowi *fl, - const struct flowi *oldflp, + const struct flowi4 *fl4, + const struct flowi4 *oldflp4, struct net_device *dev_out, unsigned int flags) { struct fib_info *fi = res->fi; - u32 tos = RT_FL_TOS(oldflp); + u32 tos = RT_FL_TOS(oldflp4); struct in_device *in_dev; u16 type = res->type; struct rtable *rth; - if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK)) + if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) return ERR_PTR(-EINVAL); - if (ipv4_is_lbcast(fl->fl4_dst)) + if (ipv4_is_lbcast(fl4->daddr)) type = RTN_BROADCAST; - else if (ipv4_is_multicast(fl->fl4_dst)) + else if (ipv4_is_multicast(fl4->daddr)) type = RTN_MULTICAST; - else if (ipv4_is_zeronet(fl->fl4_dst)) + else if (ipv4_is_zeronet(fl4->daddr)) return ERR_PTR(-EINVAL); if (dev_out->flags & IFF_LOOPBACK) @@ -2368,8 +2368,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fi = NULL; } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; - if (!ip_check_mc_rcu(in_dev, oldflp->fl4_dst, oldflp->fl4_src, - oldflp->flowi_proto)) + if (!ip_check_mc_rcu(in_dev, oldflp4->daddr, oldflp4->saddr, + oldflp4->flowi4_proto)) flags &= ~RTCF_LOCAL; /* If multicast route do not exist use * default one, but do not gateway in this case. @@ -2384,20 +2384,20 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (!rth) return ERR_PTR(-ENOBUFS); - rth->rt_key_dst = oldflp->fl4_dst; + rth->rt_key_dst = oldflp4->daddr; rth->rt_tos = tos; - rth->rt_key_src = oldflp->fl4_src; - rth->rt_oif = oldflp->flowi_oif; - rth->rt_mark = oldflp->flowi_mark; - rth->rt_dst = fl->fl4_dst; - rth->rt_src = fl->fl4_src; + rth->rt_key_src = oldflp4->saddr; + rth->rt_oif = oldflp4->flowi4_oif; + rth->rt_mark = oldflp4->flowi4_mark; + rth->rt_dst = fl4->daddr; + rth->rt_src = fl4->saddr; rth->rt_iif = 0; /* get references to the devices that are to be hold by the routing cache entry */ rth->dst.dev = dev_out; dev_hold(dev_out); - rth->rt_gateway = fl->fl4_dst; - rth->rt_spec_dst= fl->fl4_src; + rth->rt_gateway = fl4->daddr; + rth->rt_spec_dst= fl4->saddr; rth->dst.output=ip_output; rth->rt_genid = rt_genid(dev_net(dev_out)); @@ -2406,10 +2406,10 @@ static struct rtable *__mkroute_output(const struct fib_result *res, if (flags & RTCF_LOCAL) { rth->dst.input = ip_local_deliver; - rth->rt_spec_dst = fl->fl4_dst; + rth->rt_spec_dst = fl4->daddr; } if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { - rth->rt_spec_dst = fl->fl4_src; + rth->rt_spec_dst = fl4->saddr; if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { rth->dst.output = ip_mc_output; @@ -2418,7 +2418,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #ifdef CONFIG_IP_MROUTE if (type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && - !ipv4_is_local_multicast(oldflp->fl4_dst)) { + !ipv4_is_local_multicast(oldflp4->daddr)) { rth->dst.input = ip_mr_input; rth->dst.output = ip_mc_output; } @@ -2426,7 +2426,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, #endif } - rt_set_nexthop(rth, oldflp, res, fi, type, 0); + rt_set_nexthop(rth, oldflp4, res, fi, type, 0); rth->rt_flags = flags; return rth; @@ -2438,10 +2438,10 @@ static struct rtable *__mkroute_output(const struct fib_result *res, */ static struct rtable *ip_route_output_slow(struct net *net, - const struct flowi *oldflp) + const struct flowi4 *oldflp4) { - u32 tos = RT_FL_TOS(oldflp); - struct flowi fl; + u32 tos = RT_FL_TOS(oldflp4); + struct flowi4 fl4; struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; @@ -2452,21 +2452,21 @@ static struct rtable *ip_route_output_slow(struct net *net, res.r = NULL; #endif - fl.flowi_oif = oldflp->flowi_oif; - fl.flowi_iif = net->loopback_dev->ifindex; - fl.flowi_mark = oldflp->flowi_mark; - fl.fl4_dst = oldflp->fl4_dst; - fl.fl4_src = oldflp->fl4_src; - fl.fl4_tos = tos & IPTOS_RT_MASK; - fl.fl4_scope = ((tos & RTO_ONLINK) ? + fl4.flowi4_oif = oldflp4->flowi4_oif; + fl4.flowi4_iif = net->loopback_dev->ifindex; + fl4.flowi4_mark = oldflp4->flowi4_mark; + fl4.daddr = oldflp4->daddr; + fl4.saddr = oldflp4->saddr; + fl4.flowi4_tos = tos & IPTOS_RT_MASK; + fl4.flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); rcu_read_lock(); - if (oldflp->fl4_src) { + if (oldflp4->saddr) { rth = ERR_PTR(-EINVAL); - if (ipv4_is_multicast(oldflp->fl4_src) || - ipv4_is_lbcast(oldflp->fl4_src) || - ipv4_is_zeronet(oldflp->fl4_src)) + if (ipv4_is_multicast(oldflp4->saddr) || + ipv4_is_lbcast(oldflp4->saddr) || + ipv4_is_zeronet(oldflp4->saddr)) goto out; /* I removed check for oif == dev_out->oif here. @@ -2477,11 +2477,11 @@ static struct rtable *ip_route_output_slow(struct net *net, of another iface. --ANK */ - if (oldflp->flowi_oif == 0 && - (ipv4_is_multicast(oldflp->fl4_dst) || - ipv4_is_lbcast(oldflp->fl4_dst))) { + if (oldflp4->flowi4_oif == 0 && + (ipv4_is_multicast(oldflp4->daddr) || + ipv4_is_lbcast(oldflp4->daddr))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = __ip_dev_find(net, oldflp->fl4_src, false); + dev_out = __ip_dev_find(net, oldflp4->saddr, false); if (dev_out == NULL) goto out; @@ -2500,20 +2500,20 @@ static struct rtable *ip_route_output_slow(struct net *net, Luckily, this hack is good workaround. */ - fl.flowi_oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; goto make_route; } - if (!(oldflp->flowi_flags & FLOWI_FLAG_ANYSRC)) { + if (!(oldflp4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - if (!__ip_dev_find(net, oldflp->fl4_src, false)) + if (!__ip_dev_find(net, oldflp4->saddr, false)) goto out; } } - if (oldflp->flowi_oif) { - dev_out = dev_get_by_index_rcu(net, oldflp->flowi_oif); + if (oldflp4->flowi4_oif) { + dev_out = dev_get_by_index_rcu(net, oldflp4->flowi4_oif); rth = ERR_PTR(-ENODEV); if (dev_out == NULL) goto out; @@ -2523,37 +2523,37 @@ static struct rtable *ip_route_output_slow(struct net *net, rth = ERR_PTR(-ENETUNREACH); goto out; } - if (ipv4_is_local_multicast(oldflp->fl4_dst) || - ipv4_is_lbcast(oldflp->fl4_dst)) { - if (!fl.fl4_src) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_LINK); + if (ipv4_is_local_multicast(oldflp4->daddr) || + ipv4_is_lbcast(oldflp4->daddr)) { + if (!fl4.saddr) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_LINK); goto make_route; } - if (!fl.fl4_src) { - if (ipv4_is_multicast(oldflp->fl4_dst)) - fl.fl4_src = inet_select_addr(dev_out, 0, - fl.fl4_scope); - else if (!oldflp->fl4_dst) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_HOST); + if (!fl4.saddr) { + if (ipv4_is_multicast(oldflp4->daddr)) + fl4.saddr = inet_select_addr(dev_out, 0, + fl4.flowi4_scope); + else if (!oldflp4->daddr) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_HOST); } } - if (!fl.fl4_dst) { - fl.fl4_dst = fl.fl4_src; - if (!fl.fl4_dst) - fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); + if (!fl4.daddr) { + fl4.daddr = fl4.saddr; + if (!fl4.daddr) + fl4.daddr = fl4.saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl.flowi_oif = net->loopback_dev->ifindex; + fl4.flowi4_oif = net->loopback_dev->ifindex; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } - if (fib_lookup(net, &fl.u.ip4, &res)) { + if (fib_lookup(net, &fl4, &res)) { res.fi = NULL; - if (oldflp->flowi_oif) { + if (oldflp4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2572,9 +2572,9 @@ static struct rtable *ip_route_output_slow(struct net *net, likely IPv6, but we do not. */ - if (fl.fl4_src == 0) - fl.fl4_src = inet_select_addr(dev_out, 0, - RT_SCOPE_LINK); + if (fl4.saddr == 0) + fl4.saddr = inet_select_addr(dev_out, 0, + RT_SCOPE_LINK); res.type = RTN_UNICAST; goto make_route; } @@ -2583,42 +2583,42 @@ static struct rtable *ip_route_output_slow(struct net *net, } if (res.type == RTN_LOCAL) { - if (!fl.fl4_src) { + if (!fl4.saddr) { if (res.fi->fib_prefsrc) - fl.fl4_src = res.fi->fib_prefsrc; + fl4.saddr = res.fi->fib_prefsrc; else - fl.fl4_src = fl.fl4_dst; + fl4.saddr = fl4.daddr; } dev_out = net->loopback_dev; - fl.flowi_oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) + if (res.fi->fib_nhs > 1 && fl4.flowi4_oif == 0) fib_select_multipath(&res); else #endif - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.flowi_oif) + if (!res.prefixlen && res.type == RTN_UNICAST && !fl4.flowi4_oif) fib_select_default(&res); - if (!fl.fl4_src) - fl.fl4_src = FIB_RES_PREFSRC(res); + if (!fl4.saddr) + fl4.saddr = FIB_RES_PREFSRC(res); dev_out = FIB_RES_DEV(res); - fl.flowi_oif = dev_out->ifindex; + fl4.flowi4_oif = dev_out->ifindex; make_route: - rth = __mkroute_output(&res, &fl, oldflp, dev_out, flags); + rth = __mkroute_output(&res, &fl4, oldflp4, dev_out, flags); if (!IS_ERR(rth)) { unsigned int hash; - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->flowi_oif, + hash = rt_hash(oldflp4->daddr, oldflp4->saddr, oldflp4->flowi4_oif, rt_genid(dev_net(dev_out))); - rth = rt_intern_hash(hash, rth, NULL, oldflp->flowi_oif); + rth = rt_intern_hash(hash, rth, NULL, oldflp4->flowi4_oif); } out: @@ -2658,7 +2658,7 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) rcu_read_unlock_bh(); slow_output: - return ip_route_output_slow(net, flp); + return ip_route_output_slow(net, &flp->u.ip4); } EXPORT_SYMBOL_GPL(__ip_route_output_key); @@ -2913,14 +2913,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { - struct flowi fl = { - .fl4_dst = dst, - .fl4_src = src, - .fl4_tos = rtm->rtm_tos, - .flowi_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, - .flowi_mark = mark, + struct flowi4 fl4 = { + .daddr = dst, + .saddr = src, + .flowi4_tos = rtm->rtm_tos, + .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + .flowi4_mark = mark, }; - rt = ip_route_output_key(net, &fl); + rt = ip_route_output_key(net, flowi4_to_flowi(&fl4)); err = 0; if (IS_ERR(rt)) -- cgit v1.1 From 9d6ec938019c6b16cb9ec96598ebe8f20de435fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 01:12:47 -0500 Subject: ipv4: Use flowi4 in public route lookup interfaces. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 20 +++++++------- net/ipv4/icmp.c | 59 ++++++++++++++++++++++------------------- net/ipv4/inet_connection_sock.c | 26 +++++++++--------- net/ipv4/ip_output.c | 22 +++++++-------- net/ipv4/netfilter.c | 26 +++++++++--------- net/ipv4/raw.c | 32 +++++++++++----------- net/ipv4/route.c | 36 +++++++++++++------------ net/ipv4/syncookies.c | 24 ++++++++--------- net/ipv4/udp.c | 26 +++++++++--------- net/ipv4/xfrm4_policy.c | 10 +++---- net/netfilter/ipvs/ip_vs_xmit.c | 12 ++++----- net/netfilter/xt_TEE.c | 14 +++++----- net/sctp/protocol.c | 30 ++++++++++----------- 13 files changed, 172 insertions(+), 165 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d934b20..be98470 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -465,18 +465,18 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { - .flowi_oif = skb_rtable(skb)->rt_iif, - .fl4_dst = ip_hdr(skb)->saddr, - .fl4_src = ip_hdr(skb)->daddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .flowi_proto = sk->sk_protocol, - .fl4_sport = dccp_hdr(skb)->dccph_dport, - .fl4_dport = dccp_hdr(skb)->dccph_sport, + struct flowi4 fl4 = { + .flowi4_oif = skb_rtable(skb)->rt_iif, + .daddr = ip_hdr(skb)->saddr, + .saddr = ip_hdr(skb)->daddr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = sk->sk_protocol, + .uli.ports.sport = dccp_hdr(skb)->dccph_dport, + .uli.ports.dport = dccp_hdr(skb)->dccph_sport, }; - security_skb_classify_flow(skb, &fl); - rt = ip_route_output_flow(net, &fl, sk); + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 8d09195..8eca3c2 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -353,14 +353,14 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { - .fl4_dst = daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .flowi_proto = IPPROTO_ICMP, + struct flowi4 fl4 = { + .daddr = daddr, + .saddr = rt->rt_spec_dst, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_proto = IPPROTO_ICMP, }; - security_skb_classify_flow(skb, &fl); - rt = ip_route_output_key(net, &fl); + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) goto out_unlock; } @@ -378,30 +378,31 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, int type, int code, struct icmp_bxm *param) { - struct flowi fl = { - .fl4_dst = (param->replyopts.srr ? - param->replyopts.faddr : iph->saddr), - .fl4_src = saddr, - .fl4_tos = RT_TOS(tos), - .flowi_proto = IPPROTO_ICMP, - .fl4_icmp_type = type, - .fl4_icmp_code = code, + struct flowi4 fl4 = { + .daddr = (param->replyopts.srr ? + param->replyopts.faddr : iph->saddr), + .saddr = saddr, + .flowi4_tos = RT_TOS(tos), + .flowi4_proto = IPPROTO_ICMP, + .uli.icmpt.type = type, + .uli.icmpt.code = code, }; struct rtable *rt, *rt2; int err; - security_skb_classify_flow(skb_in, &fl); - rt = __ip_route_output_key(net, &fl); + security_skb_classify_flow(skb_in, flowi4_to_flowi(&fl4)); + rt = __ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return rt; /* No need to clone since we're just using its address. */ rt2 = rt; - if (!fl.fl4_src) - fl.fl4_src = rt->rt_src; + if (!fl4.saddr) + fl4.saddr = rt->rt_src; - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, &fl, NULL, 0); + rt = (struct rtable *) xfrm_lookup(net, &rt->dst, + flowi4_to_flowi(&fl4), NULL, 0); if (!IS_ERR(rt)) { if (rt != rt2) return rt; @@ -410,27 +411,27 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, } else return rt; - err = xfrm_decode_session_reverse(skb_in, &fl, AF_INET); + err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4), AF_INET); if (err) goto relookup_failed; - if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) { - rt2 = __ip_route_output_key(net, &fl); + if (inet_addr_type(net, fl4.saddr) == RTN_LOCAL) { + rt2 = __ip_route_output_key(net, &fl4); if (IS_ERR(rt2)) err = PTR_ERR(rt2); } else { - struct flowi fl2 = {}; + struct flowi4 fl4_2 = {}; unsigned long orefdst; - fl2.fl4_dst = fl.fl4_src; - rt2 = ip_route_output_key(net, &fl2); + fl4_2.daddr = fl4.saddr; + rt2 = ip_route_output_key(net, &fl4_2); if (IS_ERR(rt2)) { err = PTR_ERR(rt2); goto relookup_failed; } /* Ugh! */ orefdst = skb_in->_skb_refdst; /* save old refdst */ - err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, + err = ip_route_input(skb_in, fl4.daddr, fl4.saddr, RT_TOS(tos), rt2->dst.dev); dst_release(&rt2->dst); @@ -441,7 +442,9 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, if (err) goto relookup_failed; - rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, &fl, NULL, XFRM_LOOKUP_ICMP); + rt2 = (struct rtable *) xfrm_lookup(net, &rt2->dst, + flowi4_to_flowi(&fl4), NULL, + XFRM_LOOKUP_ICMP); if (!IS_ERR(rt2)) { dst_release(&rt->dst); rt = rt2; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 10a8e95..beecc12 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -356,22 +356,22 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { - .flowi_oif = sk->sk_bound_dev_if, - .flowi_mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .flowi_proto = sk->sk_protocol, - .flowi_flags = inet_sk_flowi_flags(sk), - .fl4_sport = inet_sk(sk)->inet_sport, - .fl4_dport = ireq->rmt_port, + struct flowi4 fl4 = { + .flowi4_oif = sk->sk_bound_dev_if, + .flowi4_mark = sk->sk_mark, + .daddr = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .saddr = ireq->loc_addr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = inet_sk_flowi_flags(sk), + .uli.ports.sport = inet_sk(sk)->inet_sport, + .uli.ports.dport = ireq->rmt_port, }; struct net *net = sock_net(sk); - security_req_classify_flow(req, &fl); - rt = ip_route_output_flow(net, &fl, sk); + security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) goto no_route; if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 67e5f71..2b9cc40 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1474,18 +1474,18 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar } { - struct flowi fl = { - .flowi_oif = arg->bound_dev_if, - .fl4_dst = daddr, - .fl4_src = rt->rt_spec_dst, - .fl4_tos = RT_TOS(ip_hdr(skb)->tos), - .fl4_sport = tcp_hdr(skb)->dest, - .fl4_dport = tcp_hdr(skb)->source, - .flowi_proto = sk->sk_protocol, - .flowi_flags = ip_reply_arg_flowi_flags(arg), + struct flowi4 fl4 = { + .flowi4_oif = arg->bound_dev_if, + .daddr = daddr, + .saddr = rt->rt_spec_dst, + .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .uli.ports.sport = tcp_hdr(skb)->dest, + .uli.ports.dport = tcp_hdr(skb)->source, + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = ip_reply_arg_flowi_flags(arg), }; - security_skb_classify_flow(skb, &fl); - rt = ip_route_output_key(sock_net(sk), &fl); + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) return; } diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6f40ba5..f3c0b54 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -16,7 +16,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - struct flowi fl = {}; + struct flowi4 fl4 = {}; unsigned long orefdst; unsigned int hh_len; unsigned int type; @@ -31,14 +31,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { - fl.fl4_dst = iph->daddr; + fl4.daddr = iph->daddr; if (type == RTN_LOCAL) - fl.fl4_src = iph->saddr; - fl.fl4_tos = RT_TOS(iph->tos); - fl.flowi_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl.flowi_mark = skb->mark; - fl.flowi_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; - rt = ip_route_output_key(net, &fl); + fl4.saddr = iph->saddr; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; @@ -48,8 +48,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ - fl.fl4_dst = iph->saddr; - rt = ip_route_output_key(net, &fl); + fl4.daddr = iph->saddr; + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; @@ -68,10 +68,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET) == 0) { + xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); - dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); + dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); @@ -223,7 +223,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, static int nf_ip_route(struct dst_entry **dst, struct flowi *fl) { - struct rtable *rt = ip_route_output_key(&init_net, fl); + struct rtable *rt = ip_route_output_key(&init_net, &fl->u.ip4); if (IS_ERR(rt)) return PTR_ERR(rt); *dst = &rt->dst; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 333b826..452e178 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -402,7 +402,7 @@ error: return err; } -static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg) { struct iovec *iov; u8 __user *type = NULL; @@ -418,7 +418,7 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->flowi_proto) { + switch (fl4->flowi4_proto) { case IPPROTO_ICMP: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -433,8 +433,8 @@ static int raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl->fl4_icmp_type, type) || - get_user(fl->fl4_icmp_code, code)) + if (get_user(fl4->uli.icmpt.type, type) || + get_user(fl4->uli.icmpt.code, code)) return -EFAULT; probed = 1; } @@ -548,23 +548,25 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, } { - struct flowi fl = { .flowi_oif = ipc.oif, - .flowi_mark = sk->sk_mark, - .fl4_dst = daddr, - .fl4_src = saddr, - .fl4_tos = tos, - .flowi_proto = inet->hdrincl ? IPPROTO_RAW : - sk->sk_protocol, - .flowi_flags = FLOWI_FLAG_CAN_SLEEP, + struct flowi4 fl4 = { + .flowi4_oif = ipc.oif, + .flowi4_mark = sk->sk_mark, + .daddr = daddr, + .saddr = saddr, + .flowi4_tos = tos, + .flowi4_proto = (inet->hdrincl ? + IPPROTO_RAW : + sk->sk_protocol), + .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, }; if (!inet->hdrincl) { - err = raw_probe_proto_opt(&fl, msg); + err = raw_probe_proto_opt(&fl4, msg); if (err) goto done; } - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(sock_net(sk), &fl, sk); + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9e938f9..5655095 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2626,7 +2626,7 @@ out: return rth; } -struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) +struct rtable *__ip_route_output_key(struct net *net, const struct flowi4 *flp4) { struct rtable *rth; unsigned int hash; @@ -2634,17 +2634,17 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) if (!rt_caching(net)) goto slow_output; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->flowi_oif, rt_genid(net)); + hash = rt_hash(flp4->daddr, flp4->saddr, flp4->flowi4_oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; rth = rcu_dereference_bh(rth->dst.rt_next)) { - if (rth->rt_key_dst == flp->fl4_dst && - rth->rt_key_src == flp->fl4_src && + if (rth->rt_key_dst == flp4->daddr && + rth->rt_key_src == flp4->saddr && rt_is_output_route(rth) && - rth->rt_oif == flp->flowi_oif && - rth->rt_mark == flp->flowi_mark && - !((rth->rt_tos ^ flp->fl4_tos) & + rth->rt_oif == flp4->flowi4_oif && + rth->rt_mark == flp4->flowi4_mark && + !((rth->rt_tos ^ flp4->flowi4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && !rt_is_expired(rth)) { @@ -2658,7 +2658,7 @@ struct rtable *__ip_route_output_key(struct net *net, const struct flowi *flp) rcu_read_unlock_bh(); slow_output: - return ip_route_output_slow(net, &flp->u.ip4); + return ip_route_output_slow(net, flp4); } EXPORT_SYMBOL_GPL(__ip_route_output_key); @@ -2733,20 +2733,22 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or return rt ? &rt->dst : ERR_PTR(-ENOMEM); } -struct rtable *ip_route_output_flow(struct net *net, struct flowi *flp, +struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, struct sock *sk) { - struct rtable *rt = __ip_route_output_key(net, flp); + struct rtable *rt = __ip_route_output_key(net, flp4); if (IS_ERR(rt)) return rt; - if (flp->flowi_proto) { - if (!flp->fl4_src) - flp->fl4_src = rt->rt_src; - if (!flp->fl4_dst) - flp->fl4_dst = rt->rt_dst; - rt = (struct rtable *) xfrm_lookup(net, &rt->dst, flp, sk, 0); + if (flp4->flowi4_proto) { + if (!flp4->saddr) + flp4->saddr = rt->rt_src; + if (!flp4->daddr) + flp4->daddr = rt->rt_dst; + rt = (struct rtable *) xfrm_lookup(net, &rt->dst, + flowi4_to_flowi(flp4), + sk, 0); } return rt; @@ -2920,7 +2922,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .flowi4_mark = mark, }; - rt = ip_route_output_key(net, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(net, &fl4); err = 0; if (IS_ERR(rt)) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d90529d..e3b5b75 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -345,19 +345,19 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, * no easy way to do this. */ { - struct flowi fl = { - .flowi_mark = sk->sk_mark, - .fl4_dst = ((opt && opt->srr) ? - opt->faddr : ireq->rmt_addr), - .fl4_src = ireq->loc_addr, - .fl4_tos = RT_CONN_FLAGS(sk), - .flowi_proto = IPPROTO_TCP, - .flowi_flags = inet_sk_flowi_flags(sk), - .fl4_sport = th->dest, - .fl4_dport = th->source, + struct flowi4 fl4 = { + .flowi4_mark = sk->sk_mark, + .daddr = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .saddr = ireq->loc_addr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = IPPROTO_TCP, + .flowi4_flags = inet_sk_flowi_flags(sk), + .uli.ports.sport = th->dest, + .uli.ports.dport = th->source, }; - security_req_classify_flow(req, &fl); - rt = ip_route_output_key(sock_net(sk), &fl); + security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { reqsk_free(req); goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 116e4a8..25c0807 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -908,22 +908,22 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, rt = (struct rtable *)sk_dst_check(sk, 0); if (rt == NULL) { - struct flowi fl = { - .flowi_oif = ipc.oif, - .flowi_mark = sk->sk_mark, - .fl4_dst = faddr, - .fl4_src = saddr, - .fl4_tos = tos, - .flowi_proto = sk->sk_protocol, - .flowi_flags = (inet_sk_flowi_flags(sk) | - FLOWI_FLAG_CAN_SLEEP), - .fl4_sport = inet->inet_sport, - .fl4_dport = dport, + struct flowi4 fl4 = { + .flowi4_oif = ipc.oif, + .flowi4_mark = sk->sk_mark, + .daddr = faddr, + .saddr = saddr, + .flowi4_tos = tos, + .flowi4_proto = sk->sk_protocol, + .flowi4_flags = (inet_sk_flowi_flags(sk) | + FLOWI_FLAG_CAN_SLEEP), + .uli.ports.sport = inet->inet_sport, + .uli.ports.dport = dport, }; struct net *net = sock_net(sk); - security_sk_classify_flow(sk, &fl); - rt = ip_route_output_flow(net, &fl, sk); + security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b7b0921..b111f468 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -22,16 +22,16 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { - struct flowi fl = { - .fl4_dst = daddr->a4, - .fl4_tos = tos, + struct flowi4 fl4 = { + .daddr = daddr->a4, + .flowi4_tos = tos, }; struct rtable *rt; if (saddr) - fl.fl4_src = saddr->a4; + fl4.saddr = saddr->a4; - rt = __ip_route_output_key(net, &fl); + rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) return &rt->dst; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index cc8071f..7dc00e3 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -165,14 +165,14 @@ __ip_vs_reroute_locally(struct sk_buff *skb) return 0; refdst_drop(orefdst); } else { - struct flowi fl = { - .fl4_dst = iph->daddr, - .fl4_src = iph->saddr, - .fl4_tos = RT_TOS(iph->tos), - .flowi_mark = skb->mark, + struct flowi4 fl4 = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowi4_tos = RT_TOS(iph->tos), + .flowi4_mark = skb->mark, }; - rt = ip_route_output_key(net, &fl); + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return 0; if (!(rt->rt_flags & RTCF_LOCAL)) { diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index cb14ae2..d8c00f9 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -62,18 +62,18 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) const struct iphdr *iph = ip_hdr(skb); struct net *net = pick_net(skb); struct rtable *rt; - struct flowi fl; + struct flowi4 fl4; - memset(&fl, 0, sizeof(fl)); + memset(&fl4, 0, sizeof(fl4)); if (info->priv) { if (info->priv->oif == -1) return false; - fl.flowi_oif = info->priv->oif; + fl4.flowi4_oif = info->priv->oif; } - fl.fl4_dst = info->gw.ip; - fl.fl4_tos = RT_TOS(iph->tos); - fl.fl4_scope = RT_SCOPE_UNIVERSE; - rt = ip_route_output_key(net, &fl); + fl4.daddr = info->gw.ip; + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return false; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index b6fa294..31c0456 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -468,30 +468,30 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, union sctp_addr *saddr) { struct rtable *rt; - struct flowi fl; + struct flowi4 fl4; struct sctp_bind_addr *bp; struct sctp_sockaddr_entry *laddr; struct dst_entry *dst = NULL; union sctp_addr dst_saddr; - memset(&fl, 0x0, sizeof(struct flowi)); - fl.fl4_dst = daddr->v4.sin_addr.s_addr; - fl.fl4_dport = daddr->v4.sin_port; - fl.flowi_proto = IPPROTO_SCTP; + memset(&fl4, 0x0, sizeof(struct flowi4)); + fl4.daddr = daddr->v4.sin_addr.s_addr; + fl4.uli.ports.dport = daddr->v4.sin_port; + fl4.flowi4_proto = IPPROTO_SCTP; if (asoc) { - fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); - fl.flowi_oif = asoc->base.sk->sk_bound_dev_if; - fl.fl4_sport = htons(asoc->base.bind_addr.port); + fl4.flowi4_tos = RT_CONN_FLAGS(asoc->base.sk); + fl4.flowi4_oif = asoc->base.sk->sk_bound_dev_if; + fl4.uli.ports.sport = htons(asoc->base.bind_addr.port); } if (saddr) { - fl.fl4_src = saddr->v4.sin_addr.s_addr; - fl.fl4_sport = saddr->v4.sin_port; + fl4.saddr = saddr->v4.sin_addr.s_addr; + fl4.uli.ports.sport = saddr->v4.sin_port; } SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", - __func__, &fl.fl4_dst, &fl.fl4_src); + __func__, &fl4.daddr, &fl4.saddr); - rt = ip_route_output_key(&init_net, &fl); + rt = ip_route_output_key(&init_net, &fl4); if (!IS_ERR(rt)) dst = &rt->dst; @@ -533,9 +533,9 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, continue; if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { - fl.fl4_src = laddr->a.v4.sin_addr.s_addr; - fl.fl4_sport = laddr->a.v4.sin_port; - rt = ip_route_output_key(&init_net, &fl); + fl4.saddr = laddr->a.v4.sin_addr.s_addr; + fl4.uli.ports.sport = laddr->a.v4.sin_port; + rt = ip_route_output_key(&init_net, &fl4); if (!IS_ERR(rt)) { dst = &rt->dst; goto out_unlock; -- cgit v1.1 From 9ade22861f922344788321e374c542c92bc049b6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 02:02:42 -0500 Subject: ipv4: Use flowi4 in FIB layer. Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 40 ++++++++++++++++++++-------------------- net/ipv4/fib_rules.c | 7 ++++--- net/ipv4/fib_semantics.c | 14 +++++++------- 3 files changed, 31 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 48125d5..a373a25 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -140,7 +140,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - struct flowi fl = { .fl4_dst = addr }; + struct flowi4 fl4 = { .daddr = addr }; struct fib_result res; unsigned ret = RTN_BROADCAST; struct fib_table *local_table; @@ -158,7 +158,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, if (local_table) { ret = RTN_UNICAST; rcu_read_lock(); - if (!fib_table_lookup(local_table, &fl.u.ip4, &res, FIB_LOOKUP_NOREF)) { + if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; } @@ -193,20 +193,20 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, u32 *itag, u32 mark) { struct in_device *in_dev; - struct flowi fl; + struct flowi4 fl4; struct fib_result res; int no_addr, rpf, accept_local; bool dev_match; int ret; struct net *net; - fl.flowi_oif = 0; - fl.flowi_iif = oif; - fl.flowi_mark = mark; - fl.fl4_dst = src; - fl.fl4_src = dst; - fl.fl4_tos = tos; - fl.fl4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_oif = 0; + fl4.flowi4_iif = oif; + fl4.flowi4_mark = mark; + fl4.daddr = src; + fl4.saddr = dst; + fl4.flowi4_tos = tos; + fl4.flowi4_scope = RT_SCOPE_UNIVERSE; no_addr = rpf = accept_local = 0; in_dev = __in_dev_get_rcu(dev); @@ -215,14 +215,14 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, rpf = IN_DEV_RPFILTER(in_dev); accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); if (mark && !IN_DEV_SRC_VMARK(in_dev)) - fl.flowi_mark = 0; + fl4.flowi4_mark = 0; } if (in_dev == NULL) goto e_inval; net = dev_net(dev); - if (fib_lookup(net, &fl.u.ip4, &res)) + if (fib_lookup(net, &fl4, &res)) goto last_resort; if (res.type != RTN_UNICAST) { if (res.type != RTN_LOCAL || !accept_local) @@ -253,10 +253,10 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, goto last_resort; if (rpf == 1) goto e_rpf; - fl.flowi_oif = dev->ifindex; + fl4.flowi4_oif = dev->ifindex; ret = 0; - if (fib_lookup(net, &fl.u.ip4, &res) == 0) { + if (fib_lookup(net, &fl4, &res) == 0) { if (res.type == RTN_UNICAST) { *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; @@ -796,11 +796,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) { struct fib_result res; - struct flowi fl = { - .flowi_mark = frn->fl_mark, - .fl4_dst = frn->fl_addr, - .fl4_tos = frn->fl_tos, - .fl4_scope = frn->fl_scope, + struct flowi4 fl4 = { + .flowi4_mark = frn->fl_mark, + .daddr = frn->fl_addr, + .flowi4_tos = frn->fl_tos, + .flowi4_scope = frn->fl_scope, }; #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -813,7 +813,7 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) frn->tb_id = tb->tb_id; rcu_read_lock(); - frn->err = fib_table_lookup(tb, &fl.u.ip4, &res, FIB_LOOKUP_NOREF); + frn->err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); if (!frn->err) { frn->prefixlen = res.prefixlen; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0c63c4a..a53bb1b 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -106,14 +106,15 @@ errout: static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib4_rule *r = (struct fib4_rule *) rule; - __be32 daddr = fl->fl4_dst; - __be32 saddr = fl->fl4_src; + struct flowi4 *fl4 = &fl->u.ip4; + __be32 daddr = fl4->daddr; + __be32 saddr = fl4->saddr; if (((saddr ^ r->src) & r->srcmask) || ((daddr ^ r->dst) & r->dstmask)) return 0; - if (r->tos && (r->tos != fl->fl4_tos)) + if (r->tos && (r->tos != fl4->flowi4_tos)) return 0; return 1; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a721013..622ac4c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -560,16 +560,16 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, } rcu_read_lock(); { - struct flowi fl = { - .fl4_dst = nh->nh_gw, - .fl4_scope = cfg->fc_scope + 1, - .flowi_oif = nh->nh_oif, + struct flowi4 fl4 = { + .daddr = nh->nh_gw, + .flowi4_scope = cfg->fc_scope + 1, + .flowi4_oif = nh->nh_oif, }; /* It is not necessary, but requires a bit of thinking */ - if (fl.fl4_scope < RT_SCOPE_LINK) - fl.fl4_scope = RT_SCOPE_LINK; - err = fib_lookup(net, &fl.u.ip4, &res); + if (fl4.flowi4_scope < RT_SCOPE_LINK) + fl4.flowi4_scope = RT_SCOPE_LINK; + err = fib_lookup(net, &fl4, &res); if (err) { rcu_read_unlock(); return err; -- cgit v1.1 From da91981bee8de20bcd06ee0dbddd53d62d23b1bd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 02:04:50 -0500 Subject: ipv4: Use flowi4 in ipmr code. Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3b72b0a..1f62eae 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -148,14 +148,15 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) return NULL; } -static int ipmr_fib_lookup(struct net *net, struct flowi *flp, +static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { struct ipmr_result res; struct fib_lookup_arg arg = { .result = &res, }; int err; - err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv4.mr_rules_ops, + flowi4_to_flowi(flp4), 0, &arg); if (err < 0) return err; *mrt = res.mrt; @@ -283,7 +284,7 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) return net->ipv4.mrt; } -static int ipmr_fib_lookup(struct net *net, struct flowi *flp, +static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { *mrt = net->ipv4.mrt; @@ -435,14 +436,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); struct mr_table *mrt; - struct flowi fl = { - .flowi_oif = dev->ifindex, - .flowi_iif = skb->skb_iif, - .flowi_mark = skb->mark, + struct flowi4 fl4 = { + .flowi4_oif = dev->ifindex, + .flowi4_iif = skb->skb_iif, + .flowi4_mark = skb->mark, }; int err; - err = ipmr_fib_lookup(net, &fl, &mrt); + err = ipmr_fib_lookup(net, &fl4, &mrt); if (err < 0) { kfree_skb(skb); return err; @@ -1789,18 +1790,18 @@ dont_forward: static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct rtable *rt) { - struct flowi fl = { - .fl4_dst = rt->rt_key_dst, - .fl4_src = rt->rt_key_src, - .fl4_tos = rt->rt_tos, - .flowi_oif = rt->rt_oif, - .flowi_iif = rt->rt_iif, - .flowi_mark = rt->rt_mark, + struct flowi4 fl4 = { + .daddr = rt->rt_key_dst, + .saddr = rt->rt_key_src, + .flowi4_tos = rt->rt_tos, + .flowi4_oif = rt->rt_oif, + .flowi4_iif = rt->rt_iif, + .flowi4_mark = rt->rt_mark, }; struct mr_table *mrt; int err; - err = ipmr_fib_lookup(net, &fl, &mrt); + err = ipmr_fib_lookup(net, &fl4, &mrt); if (err) return ERR_PTR(err); return mrt; -- cgit v1.1 From 3073e5ab927487328d48cd74faac0f95a8336867 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 02:06:33 -0500 Subject: netfilter: Use flowi4 in nf_nat_standalone.c Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_standalone.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 1f3c695..963320b 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -31,6 +31,7 @@ #ifdef CONFIG_XFRM static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) { + struct flowi4 *fl4 = &fl->u.ip4; const struct nf_conn *ct; const struct nf_conntrack_tuple *t; enum ip_conntrack_info ctinfo; @@ -49,25 +50,25 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) statusbit = IPS_SRC_NAT; if (ct->status & statusbit) { - fl->fl4_dst = t->dst.u3.ip; + fl4->daddr = t->dst.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl->fl4_dport = t->dst.u.tcp.port; + fl4->uli.ports.dport = t->dst.u.tcp.port; } statusbit ^= IPS_NAT_MASK; if (ct->status & statusbit) { - fl->fl4_src = t->src.u3.ip; + fl4->saddr = t->src.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl->fl4_sport = t->src.u.tcp.port; + fl4->uli.ports.sport = t->src.u.tcp.port; } } #endif -- cgit v1.1 From b6f21b268026165a239edb5f4d7120eacf7bc593 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 02:09:18 -0500 Subject: ipv4: Use flowi4 in UDP Signed-off-by: David S. Miller --- net/ipv4/udp.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 25c0807..91cba3c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -770,7 +770,7 @@ static int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; + struct flowi4 *fl4 = &inet->cork.fl.u.ip4; struct sk_buff *skb; int err = 0; @@ -778,7 +778,7 @@ static int udp_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_send_skb(skb, fl->fl4_dst, fl->fl4_dport); + err = udp_send_skb(skb, fl4->daddr, fl4->uli.ports.dport); out: up->len = 0; @@ -791,6 +791,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); + struct flowi4 *fl4; int ulen = len; struct ipcm_cookie ipc; struct rtable *rt = NULL; @@ -972,10 +973,11 @@ back_from_confirm: /* * Now cork the socket to pend data. */ - inet->cork.fl.fl4_dst = daddr; - inet->cork.fl.fl4_dport = dport; - inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl4_sport = inet->inet_sport; + fl4 = &inet->cork.fl.u.ip4; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->uli.ports.dport = dport; + fl4->uli.ports.sport = inet->inet_sport; up->pending = AF_INET; do_append_data: -- cgit v1.1 From 5a49d0e04d62ab3f85aea4d15e0ca8be9b0ee89b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 02:14:05 -0500 Subject: netfilter: Use flowi4 and flowi6 in nf_conntrack_h323_main Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_h323_main.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index b969025..533a183 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -714,7 +714,6 @@ static int callforward_do_filter(const union nf_inet_addr *src, u_int8_t family) { const struct nf_afinfo *afinfo; - struct flowi fl1, fl2; int ret = 0; /* rcu_read_lock()ed by nf_hook_slow() */ @@ -722,17 +721,20 @@ static int callforward_do_filter(const union nf_inet_addr *src, if (!afinfo) return 0; - memset(&fl1, 0, sizeof(fl1)); - memset(&fl2, 0, sizeof(fl2)); - switch (family) { case AF_INET: { + struct flowi4 fl1, fl2; struct rtable *rt1, *rt2; - fl1.fl4_dst = src->ip; - fl2.fl4_dst = dst->ip; - if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { - if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { + memset(&fl1, 0, sizeof(fl1)); + fl1.daddr = src->ip; + + memset(&fl2, 0, sizeof(fl2)); + fl2.daddr = dst->ip; + if (!afinfo->route((struct dst_entry **)&rt1, + flowi4_to_flowi(&fl1))) { + if (!afinfo->route((struct dst_entry **)&rt2, + flowi4_to_flowi(&fl2))) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; @@ -745,12 +747,18 @@ static int callforward_do_filter(const union nf_inet_addr *src, #if defined(CONFIG_NF_CONNTRACK_IPV6) || \ defined(CONFIG_NF_CONNTRACK_IPV6_MODULE) case AF_INET6: { + struct flowi6 fl1, fl2; struct rt6_info *rt1, *rt2; - memcpy(&fl1.fl6_dst, src, sizeof(fl1.fl6_dst)); - memcpy(&fl2.fl6_dst, dst, sizeof(fl2.fl6_dst)); - if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { - if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { + memset(&fl1, 0, sizeof(fl1)); + ipv6_addr_copy(&fl1.daddr, &src->in6); + + memset(&fl2, 0, sizeof(fl2)); + ipv6_addr_copy(&fl2.daddr, &dst->in6); + if (!afinfo->route((struct dst_entry **)&rt1, + flowi6_to_flowi(&fl1))) { + if (!afinfo->route((struct dst_entry **)&rt2, + flowi6_to_flowi(&fl2))) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) -- cgit v1.1 From a1bbb0e698b4ba18c6356564923bb395bed0e576 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 02:16:48 -0500 Subject: netfilter: Use flowi4 and flowi6 in xt_TCPMSS Signed-off-by: David S. Miller --- net/netfilter/xt_TCPMSS.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index eb81c38..6e6b46c 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -148,16 +148,21 @@ tcpmss_mangle_packet(struct sk_buff *skb, static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, unsigned int family) { - struct flowi fl = {}; + struct flowi fl; const struct nf_afinfo *ai; struct rtable *rt = NULL; u_int32_t mtu = ~0U; - if (family == PF_INET) - fl.fl4_dst = ip_hdr(skb)->saddr; - else - fl.fl6_dst = ipv6_hdr(skb)->saddr; + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + memset(fl6, 0, sizeof(*fl6)); + ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr); + } rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) -- cgit v1.1 From 7e1dc7b6f709dfc1a9ab4b320dbe723f45992693 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 02:42:11 -0500 Subject: net: Use flowi4 and flowi6 in xfrm layer. Signed-off-by: David S. Miller --- net/ipv4/xfrm4_policy.c | 46 ++++++++++++++++++++++++---------------------- net/ipv4/xfrm4_state.c | 14 ++++++++------ net/ipv6/xfrm6_policy.c | 39 +++++++++++++++++++++------------------ net/ipv6/xfrm6_state.c | 14 ++++++++------ net/xfrm/xfrm_policy.c | 28 ++++++++++++++++------------ 5 files changed, 77 insertions(+), 64 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b111f468..30b312c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -56,7 +56,7 @@ static int xfrm4_get_saddr(struct net *net, static int xfrm4_get_tos(const struct flowi *fl) { - return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ + return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ } static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, @@ -69,13 +69,14 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { struct rtable *rt = (struct rtable *)xdst->route; + const struct flowi4 *fl4 = &fl->u.ip4; - rt->rt_key_dst = fl->fl4_dst; - rt->rt_key_src = fl->fl4_src; - rt->rt_tos = fl->fl4_tos; - rt->rt_iif = fl->flowi_iif; - rt->rt_oif = fl->flowi_oif; - rt->rt_mark = fl->flowi_mark; + rt->rt_key_dst = fl4->daddr; + rt->rt_key_src = fl4->saddr; + rt->rt_tos = fl4->flowi4_tos; + rt->rt_iif = fl4->flowi4_iif; + rt->rt_oif = fl4->flowi4_oif; + rt->rt_mark = fl4->flowi4_mark; xdst->u.dst.dev = dev; dev_hold(dev); @@ -102,9 +103,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) { struct iphdr *iph = ip_hdr(skb); u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + struct flowi4 *fl4 = &fl->u.ip4; - memset(fl, 0, sizeof(struct flowi)); - fl->flowi_mark = skb->mark; + memset(fl4, 0, sizeof(struct flowi4)); + fl4->flowi4_mark = skb->mark; if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { switch (iph->protocol) { @@ -117,8 +119,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports = (__be16 *)xprth; - fl->fl4_sport = ports[!!reverse]; - fl->fl4_dport = ports[!reverse]; + fl4->uli.ports.sport = ports[!!reverse]; + fl4->uli.ports.dport = ports[!reverse]; } break; @@ -126,8 +128,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp = xprth; - fl->fl4_icmp_type = icmp[0]; - fl->fl4_icmp_code = icmp[1]; + fl4->uli.icmpt.type = icmp[0]; + fl4->uli.icmpt.code = icmp[1]; } break; @@ -135,7 +137,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr = (__be32 *)xprth; - fl->fl4_ipsec_spi = ehdr[0]; + fl4->uli.spi = ehdr[0]; } break; @@ -143,7 +145,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr = (__be32*)xprth; - fl->fl4_ipsec_spi = ah_hdr[1]; + fl4->uli.spi = ah_hdr[1]; } break; @@ -151,7 +153,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr = (__be16 *)xprth; - fl->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); + fl4->uli.spi = htonl(ntohs(ipcomp_hdr[1])); } break; @@ -163,20 +165,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) gre_hdr++; - fl->fl4_gre_key = gre_hdr[1]; + fl4->uli.gre_key = gre_hdr[1]; } } break; default: - fl->fl4_ipsec_spi = 0; + fl4->uli.spi = 0; break; } } - fl->flowi_proto = iph->protocol; - fl->fl4_dst = reverse ? iph->saddr : iph->daddr; - fl->fl4_src = reverse ? iph->daddr : iph->saddr; - fl->fl4_tos = iph->tos; + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; } static inline int xfrm4_garbage_collect(struct dst_ops *ops) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index d8d5419..1717c64 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -23,17 +23,19 @@ static int xfrm4_init_flags(struct xfrm_state *x) static void __xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { - sel->daddr.a4 = fl->fl4_dst; - sel->saddr.a4 = fl->fl4_src; - sel->dport = xfrm_flowi_dport(fl, &fl->u.ip4.uli); + const struct flowi4 *fl4 = &fl->u.ip4; + + sel->daddr.a4 = fl4->daddr; + sel->saddr.a4 = fl4->saddr; + sel->dport = xfrm_flowi_dport(fl, &fl4->uli); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl, &fl->u.ip4.uli); + sel->sport = xfrm_flowi_sport(fl, &fl4->uli); sel->sport_mask = htons(0xffff); sel->family = AF_INET; sel->prefixlen_d = 32; sel->prefixlen_s = 32; - sel->proto = fl->flowi_proto; - sel->ifindex = fl->flowi_oif; + sel->proto = fl4->flowi4_proto; + sel->ifindex = fl4->flowi4_oif; } static void diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 213c759..254aa6d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -30,15 +30,17 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { - struct flowi fl = {}; + struct flowi6 fl6; struct dst_entry *dst; int err; - memcpy(&fl.fl6_dst, daddr, sizeof(fl.fl6_dst)); + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) - memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); + memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, + flowi6_to_flowi(&fl6)); err = dst->error; if (dst->error) { @@ -120,6 +122,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, static inline void _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) { + struct flowi6 *fl6 = &fl->u.ip6; int onlyproto = 0; u16 offset = skb_network_header_len(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -127,11 +130,11 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) const unsigned char *nh = skb_network_header(skb); u8 nexthdr = nh[IP6CB(skb)->nhoff]; - memset(fl, 0, sizeof(struct flowi)); - fl->flowi_mark = skb->mark; + memset(fl6, 0, sizeof(struct flowi6)); + fl6->flowi6_mark = skb->mark; - ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); - ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); + ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr); + ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr); while (nh + offset + 1 < skb->data || pskb_may_pull(skb, nh + offset + 1 - skb->data)) { @@ -158,20 +161,20 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, nh + offset + 4 - skb->data))) { __be16 *ports = (__be16 *)exthdr; - fl->fl6_sport = ports[!!reverse]; - fl->fl6_dport = ports[!reverse]; + fl6->uli.ports.sport = ports[!!reverse]; + fl6->uli.ports.dport = ports[!reverse]; } - fl->flowi_proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; case IPPROTO_ICMPV6: if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { u8 *icmp = (u8 *)exthdr; - fl->fl6_icmp_type = icmp[0]; - fl->fl6_icmp_code = icmp[1]; + fl6->uli.icmpt.type = icmp[0]; + fl6->uli.icmpt.code = icmp[1]; } - fl->flowi_proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -180,9 +183,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ip6_mh *mh; mh = (struct ip6_mh *)exthdr; - fl->fl6_mh_type = mh->ip6mh_type; + fl6->uli.mht.type = mh->ip6mh_type; } - fl->flowi_proto = nexthdr; + fl6->flowi6_proto = nexthdr; return; #endif @@ -191,8 +194,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ESP: case IPPROTO_COMP: default: - fl->fl6_ipsec_spi = 0; - fl->flowi_proto = nexthdr; + fl6->uli.spi = 0; + fl6->flowi6_proto = nexthdr; return; } } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b456533..afe941e 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -22,19 +22,21 @@ static void __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl) { + const struct flowi6 *fl6 = &fl->u.ip6; + /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); - sel->dport = xfrm_flowi_dport(fl, &fl->u.ip6.uli); + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr); + sel->dport = xfrm_flowi_dport(fl, &fl6->uli); sel->dport_mask = htons(0xffff); - sel->sport = xfrm_flowi_sport(fl, &fl->u.ip6.uli); + sel->sport = xfrm_flowi_sport(fl, &fl6->uli); sel->sport_mask = htons(0xffff); sel->family = AF_INET6; sel->prefixlen_d = 128; sel->prefixlen_s = 128; - sel->proto = fl->flowi_proto; - sel->ifindex = fl->flowi_oif; + sel->proto = fl6->flowi6_proto; + sel->ifindex = fl6->flowi6_oif; } static void diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2ecd18a..1ba0258 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -59,23 +59,27 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, static inline int __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - return addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl, &fl->u.ip4.uli) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl, &fl->u.ip4.uli) ^ sel->sport) & sel->sport_mask) && - (fl->flowi_proto == sel->proto || !sel->proto) && - (fl->flowi_oif == sel->ifindex || !sel->ifindex); + const struct flowi4 *fl4 = &fl->u.ip4; + + return addr_match(&fl4->daddr, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl4->saddr, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && + (fl4->flowi4_proto == sel->proto || !sel->proto) && + (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); } static inline int __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { - return addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) && - addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) && - !((xfrm_flowi_dport(fl, &fl->u.ip6.uli) ^ sel->dport) & sel->dport_mask) && - !((xfrm_flowi_sport(fl, &fl->u.ip6.uli) ^ sel->sport) & sel->sport_mask) && - (fl->flowi_proto == sel->proto || !sel->proto) && - (fl->flowi_oif == sel->ifindex || !sel->ifindex); + const struct flowi6 *fl6 = &fl->u.ip6; + + return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && + addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && + !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && + !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && + (fl6->flowi6_proto == sel->proto || !sel->proto) && + (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); } int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, -- cgit v1.1 From f42454d632753d71ea1a2df09be7bbda32b6372d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 02:44:16 -0500 Subject: ipv4: Kill fib_semantic_match declaration from fib_lookup.h This function no longer exists. Signed-off-by: David S. Miller --- net/ipv4/fib_lookup.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 84db2da..4ec3238 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -25,9 +25,6 @@ static inline void fib_alias_accessed(struct fib_alias *fa) } /* Exported by fib_semantics.c */ -extern int fib_semantic_match(struct fib_table *tb, struct list_head *head, - const struct flowi *flp, - struct fib_result *res, int prefixlen, int fib_flags); extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); -- cgit v1.1 From 9cce96df5b76691712dba22e83ff5efe900361e1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 03:00:33 -0500 Subject: net: Put fl4_* macros to struct flowi4 and use them again. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 4 ++-- net/ipv4/icmp.c | 4 ++-- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/ip_output.c | 4 ++-- net/ipv4/netfilter/nf_nat_standalone.c | 4 ++-- net/ipv4/raw.c | 4 ++-- net/ipv4/syncookies.c | 4 ++-- net/ipv4/udp.c | 10 +++++----- net/ipv4/xfrm4_policy.c | 18 +++++++++--------- net/sctp/protocol.c | 8 ++++---- 10 files changed, 32 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index be98470..ae451c6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -471,8 +471,8 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, .saddr = ip_hdr(skb)->daddr, .flowi4_tos = RT_CONN_FLAGS(sk), .flowi4_proto = sk->sk_protocol, - .uli.ports.sport = dccp_hdr(skb)->dccph_dport, - .uli.ports.dport = dccp_hdr(skb)->dccph_sport, + .fl4_sport = dccp_hdr(skb)->dccph_dport, + .fl4_dport = dccp_hdr(skb)->dccph_sport, }; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 8eca3c2..a91dc16 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -384,8 +384,8 @@ static struct rtable *icmp_route_lookup(struct net *net, struct sk_buff *skb_in, .saddr = saddr, .flowi4_tos = RT_TOS(tos), .flowi4_proto = IPPROTO_ICMP, - .uli.icmpt.type = type, - .uli.icmpt.code = code, + .fl4_icmp_type = type, + .fl4_icmp_code = code, }; struct rtable *rt, *rt2; int err; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index beecc12..6c0b7f4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -365,8 +365,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, .flowi4_tos = RT_CONN_FLAGS(sk), .flowi4_proto = sk->sk_protocol, .flowi4_flags = inet_sk_flowi_flags(sk), - .uli.ports.sport = inet_sk(sk)->inet_sport, - .uli.ports.dport = ireq->rmt_port, + .fl4_sport = inet_sk(sk)->inet_sport, + .fl4_dport = ireq->rmt_port, }; struct net *net = sock_net(sk); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 2b9cc40..67f241b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1479,8 +1479,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .daddr = daddr, .saddr = rt->rt_spec_dst, .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), - .uli.ports.sport = tcp_hdr(skb)->dest, - .uli.ports.dport = tcp_hdr(skb)->source, + .fl4_sport = tcp_hdr(skb)->dest, + .fl4_dport = tcp_hdr(skb)->source, .flowi4_proto = sk->sk_protocol, .flowi4_flags = ip_reply_arg_flowi_flags(arg), }; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 963320b..7317bdf 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -56,7 +56,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl4->uli.ports.dport = t->dst.u.tcp.port; + fl4->fl4_dport = t->dst.u.tcp.port; } statusbit ^= IPS_NAT_MASK; @@ -68,7 +68,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_DCCP || t->dst.protonum == IPPROTO_SCTP) - fl4->uli.ports.sport = t->src.u.tcp.port; + fl4->fl4_sport = t->src.u.tcp.port; } } #endif diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 452e178..e837ffd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -433,8 +433,8 @@ static int raw_probe_proto_opt(struct flowi4 *fl4, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl4->uli.icmpt.type, type) || - get_user(fl4->uli.icmpt.code, code)) + if (get_user(fl4->fl4_icmp_type, type) || + get_user(fl4->fl4_icmp_code, code)) return -EFAULT; probed = 1; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e3b5b75..8b44c6d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -353,8 +353,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .flowi4_tos = RT_CONN_FLAGS(sk), .flowi4_proto = IPPROTO_TCP, .flowi4_flags = inet_sk_flowi_flags(sk), - .uli.ports.sport = th->dest, - .uli.ports.dport = th->source, + .fl4_sport = th->dest, + .fl4_dport = th->source, }; security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 91cba3c..588f47a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -778,7 +778,7 @@ static int udp_push_pending_frames(struct sock *sk) if (!skb) goto out; - err = udp_send_skb(skb, fl4->daddr, fl4->uli.ports.dport); + err = udp_send_skb(skb, fl4->daddr, fl4->fl4_dport); out: up->len = 0; @@ -918,8 +918,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .flowi4_proto = sk->sk_protocol, .flowi4_flags = (inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP), - .uli.ports.sport = inet->inet_sport, - .uli.ports.dport = dport, + .fl4_sport = inet->inet_sport, + .fl4_dport = dport, }; struct net *net = sock_net(sk); @@ -976,8 +976,8 @@ back_from_confirm: fl4 = &inet->cork.fl.u.ip4; fl4->daddr = daddr; fl4->saddr = saddr; - fl4->uli.ports.dport = dport; - fl4->uli.ports.sport = inet->inet_sport; + fl4->fl4_dport = dport; + fl4->fl4_sport = inet->inet_sport; up->pending = AF_INET; do_append_data: diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 30b312c..13e0e7f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -119,8 +119,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports = (__be16 *)xprth; - fl4->uli.ports.sport = ports[!!reverse]; - fl4->uli.ports.dport = ports[!reverse]; + fl4->fl4_sport = ports[!!reverse]; + fl4->fl4_dport = ports[!reverse]; } break; @@ -128,8 +128,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp = xprth; - fl4->uli.icmpt.type = icmp[0]; - fl4->uli.icmpt.code = icmp[1]; + fl4->fl4_icmp_type = icmp[0]; + fl4->fl4_icmp_code = icmp[1]; } break; @@ -137,7 +137,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr = (__be32 *)xprth; - fl4->uli.spi = ehdr[0]; + fl4->fl4_ipsec_spi = ehdr[0]; } break; @@ -145,7 +145,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr = (__be32*)xprth; - fl4->uli.spi = ah_hdr[1]; + fl4->fl4_ipsec_spi = ah_hdr[1]; } break; @@ -153,7 +153,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr = (__be16 *)xprth; - fl4->uli.spi = htonl(ntohs(ipcomp_hdr[1])); + fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; @@ -165,13 +165,13 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) if (greflags[0] & GRE_KEY) { if (greflags[0] & GRE_CSUM) gre_hdr++; - fl4->uli.gre_key = gre_hdr[1]; + fl4->fl4_gre_key = gre_hdr[1]; } } break; default: - fl4->uli.spi = 0; + fl4->fl4_ipsec_spi = 0; break; } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 31c0456..152976e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -476,16 +476,16 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, memset(&fl4, 0x0, sizeof(struct flowi4)); fl4.daddr = daddr->v4.sin_addr.s_addr; - fl4.uli.ports.dport = daddr->v4.sin_port; + fl4.fl4_dport = daddr->v4.sin_port; fl4.flowi4_proto = IPPROTO_SCTP; if (asoc) { fl4.flowi4_tos = RT_CONN_FLAGS(asoc->base.sk); fl4.flowi4_oif = asoc->base.sk->sk_bound_dev_if; - fl4.uli.ports.sport = htons(asoc->base.bind_addr.port); + fl4.fl4_sport = htons(asoc->base.bind_addr.port); } if (saddr) { fl4.saddr = saddr->v4.sin_addr.s_addr; - fl4.uli.ports.sport = saddr->v4.sin_port; + fl4.fl4_sport = saddr->v4.sin_port; } SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", @@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, if ((laddr->state == SCTP_ADDR_SRC) && (AF_INET == laddr->a.sa.sa_family)) { fl4.saddr = laddr->a.v4.sin_addr.s_addr; - fl4.uli.ports.sport = laddr->a.v4.sin_port; + fl4.fl4_sport = laddr->a.v4.sin_port; rt = ip_route_output_key(&init_net, &fl4); if (!IS_ERR(rt)) { dst = &rt->dst; -- cgit v1.1 From 4c9483b2fb5d2548c3cc1fe03cdd4484ceeb5d1c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 16:22:43 -0500 Subject: ipv6: Convert to use flowi6 where applicable. Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 132 +++++++++++++++++++-------------------- net/ipv6/af_inet6.c | 32 +++++----- net/ipv6/datagram.c | 75 +++++++++++----------- net/ipv6/exthdrs.c | 12 ++-- net/ipv6/fib6_rules.c | 19 +++--- net/ipv6/icmp.c | 110 ++++++++++++++++---------------- net/ipv6/inet6_connection_sock.c | 60 +++++++++--------- net/ipv6/ip6_fib.c | 4 +- net/ipv6/ip6_flowlabel.c | 6 +- net/ipv6/ip6_output.c | 90 +++++++++++++------------- net/ipv6/ip6_tunnel.c | 50 +++++++-------- net/ipv6/ip6mr.c | 53 ++++++++-------- net/ipv6/ipv6_sockglue.c | 10 +-- net/ipv6/mcast.c | 12 ++-- net/ipv6/mip6.c | 13 ++-- net/ipv6/ndisc.c | 14 ++--- net/ipv6/netfilter.c | 18 +++--- net/ipv6/netfilter/ip6t_REJECT.c | 20 +++--- net/ipv6/raw.c | 79 ++++++++++++----------- net/ipv6/route.c | 96 ++++++++++++++-------------- net/ipv6/syncookies.c | 26 ++++---- net/ipv6/tcp_ipv6.c | 114 ++++++++++++++++----------------- net/ipv6/udp.c | 76 +++++++++++----------- net/ipv6/xfrm6_policy.c | 3 +- net/netfilter/ipvs/ip_vs_ctl.c | 10 ++- net/netfilter/ipvs/ip_vs_xmit.c | 14 ++--- net/netfilter/xt_TEE.c | 12 ++-- net/sctp/ipv6.c | 42 ++++++------- 28 files changed, 602 insertions(+), 600 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 2b351c6..8d26c12 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -147,22 +147,22 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, dst = __sk_dst_check(sk, np->dst_cookie); if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct flowi fl; + struct flowi6 fl6; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.fl6_dport = inet->inet_dport; - fl.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, NULL, false); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.uli.ports.dport = inet->inet_dport; + fl6.uli.ports.sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); goto out; @@ -243,25 +243,25 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, struct sk_buff *skb; struct ipv6_txoptions *opt = NULL; struct in6_addr *final_p, final; - struct flowi fl; + struct flowi6 fl6; int err = -1; struct dst_entry *dst; - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.fl6_flowlabel = 0; - fl.flowi_oif = ireq6->iif; - fl.fl6_dport = inet_rsk(req)->rmt_port; - fl.fl6_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowlabel = 0; + fl6.flowi6_oif = ireq6->iif; + fl6.uli.ports.dport = inet_rsk(req)->rmt_port; + fl6.uli.ports.sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -275,8 +275,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, dh->dccph_checksum = dccp_v6_csum_finish(skb, &ireq6->loc_addr, &ireq6->rmt_addr); - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt); + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + err = ip6_xmit(sk, skb, &fl6, opt); err = net_xmit_eval(err); } @@ -298,7 +298,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { struct ipv6hdr *rxip6h; struct sk_buff *skb; - struct flowi fl; + struct flowi6 fl6; struct net *net = dev_net(skb_dst(rxskb)->dev); struct sock *ctl_sk = net->dccp.v6_ctl_sk; struct dst_entry *dst; @@ -317,21 +317,21 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, &rxip6h->daddr); - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); - ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr); + ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr); - fl.flowi_proto = IPPROTO_DCCP; - fl.flowi_oif = inet6_iif(rxskb); - fl.fl6_dport = dccp_hdr(skb)->dccph_dport; - fl.fl6_sport = dccp_hdr(skb)->dccph_sport; - security_skb_classify_flow(rxskb, &fl); + fl6.flowi6_proto = IPPROTO_DCCP; + fl6.flowi6_oif = inet6_iif(rxskb); + fl6.uli.ports.dport = dccp_hdr(skb)->dccph_dport; + fl6.uli.ports.sport = dccp_hdr(skb)->dccph_sport; + security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl, NULL); + ip6_xmit(ctl_sk, skb, &fl6, NULL); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); return; @@ -527,19 +527,19 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (dst == NULL) { struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - final_p = fl6_update_dst(&fl, opt, &final); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.fl6_dport = inet_rsk(req)->rmt_port; - fl.fl6_sport = inet_rsk(req)->loc_port; - security_sk_classify_flow(sk, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + final_p = fl6_update_dst(&fl6, opt, &final); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.uli.ports.dport = inet_rsk(req)->rmt_port; + fl6.uli.ports.sport = inet_rsk(req)->loc_port; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) goto out; } @@ -859,7 +859,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; @@ -872,14 +872,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { + fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl6.flowlabel); + if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -916,7 +916,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; /* * DCCP over IPv4 @@ -953,24 +953,24 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.flowi_proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.fl6_dport = usin->sin6_port; - fl.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); + fl6.flowi6_proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.uli.ports.dport = usin->sin6_port; + fl6.uli.ports.sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } if (saddr == NULL) { - saddr = &fl.fl6_src; + saddr = &fl6.saddr; ipv6_addr_copy(&np->rcv_saddr, saddr); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 923febe..689eea6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -652,22 +652,22 @@ int inet6_sk_rebuild_header(struct sock *sk) if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - fl.flowi_oif = sk->sk_bound_dev_if; - fl.flowi_mark = sk->sk_mark; - fl.fl6_dport = inet->inet_dport; - fl.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); - - final_p = fl6_update_dst(&fl, np->opt, &final); - - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowlabel = np->flow_label; + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.uli.ports.dport = inet->inet_dport; + fl6.uli.ports.sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + final_p = fl6_update_dst(&fl6, np->opt, &final); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { sk->sk_route_caps = 0; sk->sk_err_soft = -PTR_ERR(dst); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 07e03e6..04ae676 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr, *final_p, final; struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; struct ip6_flowlabel *flowlabel = NULL; struct ipv6_txoptions *opt; int addr_type; @@ -59,11 +59,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -137,7 +137,7 @@ ipv4_connected: } ipv6_addr_copy(&np->daddr, daddr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; inet->inet_dport = usin->sin6_port; @@ -146,23 +146,23 @@ ipv4_connected: * destination cache for it. */ - fl.flowi_proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.flowi_mark = sk->sk_mark; - fl.fl6_dport = inet->inet_dport; - fl.fl6_sport = inet->inet_sport; + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.uli.ports.dport = inet->inet_dport; + fl6.uli.ports.sport = inet->inet_sport; - if (!fl.flowi_oif && (addr_type&IPV6_ADDR_MULTICAST)) - fl.flowi_oif = np->mcast_oif; + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) + fl6.flowi6_oif = np->mcast_oif; - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); opt = flowlabel ? flowlabel->opt : np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); err = 0; if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -172,20 +172,20 @@ ipv4_connected: /* source address lookup done in ip6_dst_lookup */ if (ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&np->saddr, &fl.fl6_src); + ipv6_addr_copy(&np->saddr, &fl6.saddr); if (ipv6_addr_any(&np->rcv_saddr)) { - ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); + ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr); inet->inet_rcv_saddr = LOOPBACK4_IPV6; if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? + ipv6_addr_equal(&fl6.daddr, &np->daddr) ? &np->daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : #endif NULL); @@ -231,7 +231,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, kfree_skb(skb); } -void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) +void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -250,7 +250,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); - ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); + ipv6_addr_copy(&iph->daddr, &fl6->daddr); serr = SKB_EXT_ERR(skb); serr->ee.ee_errno = err; @@ -261,7 +261,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) serr->ee.ee_info = info; serr->ee.ee_data = 0; serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); - serr->port = fl->fl6_dport; + serr->port = fl6->uli.ports.dport; __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); @@ -270,7 +270,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info) kfree_skb(skb); } -void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) +void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; @@ -287,7 +287,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) skb_put(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); iph = ipv6_hdr(skb); - ipv6_addr_copy(&iph->daddr, &fl->fl6_dst); + ipv6_addr_copy(&iph->daddr, &fl6->daddr); mtu_info = IP6CBMTU(skb); if (!mtu_info) { @@ -299,7 +299,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu) mtu_info->ip6m_addr.sin6_family = AF_INET6; mtu_info->ip6m_addr.sin6_port = 0; mtu_info->ip6m_addr.sin6_flowinfo = 0; - mtu_info->ip6m_addr.sin6_scope_id = fl->flowi_oif; + mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif; ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr); __skb_pull(skb, skb_tail_pointer(skb) - skb->data); @@ -593,7 +593,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } int datagram_send_ctl(struct net *net, - struct msghdr *msg, struct flowi *fl, + struct msghdr *msg, struct flowi6 *fl6, struct ipv6_txoptions *opt, int *hlimit, int *tclass, int *dontfrag) { @@ -629,16 +629,17 @@ int datagram_send_ctl(struct net *net, src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg); if (src_info->ipi6_ifindex) { - if (fl->flowi_oif && src_info->ipi6_ifindex != fl->flowi_oif) + if (fl6->flowi6_oif && + src_info->ipi6_ifindex != fl6->flowi6_oif) return -EINVAL; - fl->flowi_oif = src_info->ipi6_ifindex; + fl6->flowi6_oif = src_info->ipi6_ifindex; } addr_type = __ipv6_addr_type(&src_info->ipi6_addr); rcu_read_lock(); - if (fl->flowi_oif) { - dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (fl6->flowi6_oif) { + dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); if (!dev) { rcu_read_unlock(); return -ENODEV; @@ -654,7 +655,7 @@ int datagram_send_ctl(struct net *net, strict ? dev : NULL, 0)) err = -EINVAL; else - ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); + ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr); } rcu_read_unlock(); @@ -671,13 +672,13 @@ int datagram_send_ctl(struct net *net, goto exit_f; } - if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) { - if ((fl->fl6_flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { + if (fl6->flowlabel&IPV6_FLOWINFO_MASK) { + if ((fl6->flowlabel^*(__be32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) { err = -EINVAL; goto exit_f; } } - fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); + fl6->flowlabel = IPV6_FLOWINFO_MASK & *(__be32 *)CMSG_DATA(cmsg); break; case IPV6_2292HOPOPTS: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 262f105..79a485e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -876,22 +876,22 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, * fl6_update_dst - update flowi destination address with info given * by srcrt option, if any. * - * @fl: flowi for which fl6_dst is to be updated + * @fl6: flowi6 for which daddr is to be updated * @opt: struct ipv6_txoptions in which to look for srcrt opt - * @orig: copy of original fl6_dst address if modified + * @orig: copy of original daddr address if modified * * Returns NULL if no txoptions or no srcrt, otherwise returns orig - * and initial value of fl->fl6_dst set in orig + * and initial value of fl6->daddr set in orig */ -struct in6_addr *fl6_update_dst(struct flowi *fl, +struct in6_addr *fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt, struct in6_addr *orig) { if (!opt || !opt->srcrt) return NULL; - ipv6_addr_copy(orig, &fl->fl6_dst); - ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr); + ipv6_addr_copy(orig, &fl6->daddr); + ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr); return orig; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index d829874..34d244d 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -29,7 +29,7 @@ struct fib6_rule u8 tclass; }; -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { struct fib_lookup_arg arg = { @@ -37,7 +37,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, .flags = FIB_LOOKUP_NOREF, }; - fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); + fib_rules_lookup(net->ipv6.fib6_rules_ops, + flowi6_to_flowi(fl6), flags, &arg); if (arg.result) return arg.result; @@ -49,6 +50,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { + struct flowi6 *flp6 = &flp->u.ip6; struct rt6_info *rt = NULL; struct fib6_table *table; struct net *net = rule->fr_net; @@ -71,7 +73,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, table = fib6_get_table(net, rule->table); if (table) - rt = lookup(net, table, flp, flags); + rt = lookup(net, table, flp6, flags); if (rt != net->ipv6.ip6_null_entry) { struct fib6_rule *r = (struct fib6_rule *)rule; @@ -86,14 +88,14 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, if (ipv6_dev_get_saddr(net, ip6_dst_idev(&rt->dst)->dev, - &flp->fl6_dst, + &flp6->daddr, rt6_flags2srcprefs(flags), &saddr)) goto again; if (!ipv6_prefix_equal(&saddr, &r->src.addr, r->src.plen)) goto again; - ipv6_addr_copy(&flp->fl6_src, &saddr); + ipv6_addr_copy(&flp6->saddr, &saddr); } goto out; } @@ -113,9 +115,10 @@ out: static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib6_rule *r = (struct fib6_rule *) rule; + struct flowi6 *fl6 = &fl->u.ip6; if (r->dst.plen && - !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) + !ipv6_prefix_equal(&fl6->daddr, &r->dst.addr, r->dst.plen)) return 0; /* @@ -125,14 +128,14 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) */ if (r->src.plen) { if (flags & RT6_LOOKUP_F_HAS_SADDR) { - if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, + if (!ipv6_prefix_equal(&fl6->saddr, &r->src.addr, r->src.plen)) return 0; } else if (!(r->common.flags & FIB_RULE_FIND_SADDR)) return 0; } - if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) + if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff)) return 0; return 1; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 52ff7aa..f7b9041 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -158,7 +158,7 @@ static int is_ineligible(struct sk_buff *skb) * Check the ICMP output rate limit */ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi *fl) + struct flowi6 *fl6) { struct dst_entry *dst; struct net *net = sock_net(sk); @@ -177,7 +177,7 @@ static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ - dst = ip6_route_output(net, sk, fl); + dst = ip6_route_output(net, sk, fl6); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); @@ -217,7 +217,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len) +static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -233,9 +233,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct if (skb_queue_len(&sk->sk_write_queue) == 1) { skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); - icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - len, fl->flowi_proto, + icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, + &fl6->daddr, + len, fl6->flowi6_proto, skb->csum); } else { __wsum tmp_csum = 0; @@ -246,9 +246,9 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); - icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - len, fl->flowi_proto, + icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, + &fl6->daddr, + len, fl6->flowi6_proto, tmp_csum); } ip6_push_pending_frames(sk); @@ -301,13 +301,13 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi *fl) + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; - struct flowi fl2; + struct flowi6 fl2; int err; - err = ip6_dst_lookup(sk, &dst, fl); + err = ip6_dst_lookup(sk, &dst, fl6); if (err) return ERR_PTR(err); @@ -324,7 +324,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk /* No need to clone since we're just using its address. */ dst2 = dst; - dst = xfrm_lookup(net, dst, fl, sk, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); if (!IS_ERR(dst)) { if (dst != dst2) return dst; @@ -335,7 +335,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk return dst; } - err = xfrm_decode_session_reverse(skb, &fl2, AF_INET6); + err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; @@ -343,7 +343,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk if (err) goto relookup_failed; - dst2 = xfrm_lookup(net, dst2, &fl2, sk, XFRM_LOOKUP_ICMP); + dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); if (!IS_ERR(dst2)) { dst_release(dst); dst = dst2; @@ -375,7 +375,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) struct in6_addr *saddr = NULL; struct dst_entry *dst; struct icmp6hdr tmp_hdr; - struct flowi fl; + struct flowi6 fl6; struct icmpv6_msg msg; int iif = 0; int addr_type = 0; @@ -442,22 +442,22 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) mip6_addr_swap(skb); - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_ICMPV6; - ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl6.daddr, &hdr->saddr); if (saddr) - ipv6_addr_copy(&fl.fl6_src, saddr); - fl.flowi_oif = iif; - fl.fl6_icmp_type = type; - fl.fl6_icmp_code = code; - security_skb_classify_flow(skb, &fl); + ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_oif = iif; + fl6.uli.icmpt.type = type; + fl6.uli.icmpt.code = code; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; np = inet6_sk(sk); - if (!icmpv6_xrlim_allow(sk, type, &fl)) + if (!icmpv6_xrlim_allow(sk, type, &fl6)) goto out; tmp_hdr.icmp6_type = type; @@ -465,14 +465,14 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); - if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.flowi_oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; - dst = icmpv6_route_lookup(net, skb, sk, &fl); + dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -495,14 +495,14 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), hlimit, - np->tclass, NULL, &fl, (struct rt6_info*)dst, + np->tclass, NULL, &fl6, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); if (err) { ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); goto out_put; } - err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr)); + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); out_put: if (likely(idev != NULL)) @@ -524,7 +524,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); struct icmp6hdr tmp_hdr; - struct flowi fl; + struct flowi6 fl6; struct icmpv6_msg msg; struct dst_entry *dst; int err = 0; @@ -538,31 +538,31 @@ static void icmpv6_echo_reply(struct sk_buff *skb) memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_ICMPV6; - ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr); if (saddr) - ipv6_addr_copy(&fl.fl6_src, saddr); - fl.flowi_oif = skb->dev->ifindex; - fl.fl6_icmp_type = ICMPV6_ECHO_REPLY; - security_skb_classify_flow(skb, &fl); + ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_oif = skb->dev->ifindex; + fl6.uli.icmpt.type = ICMPV6_ECHO_REPLY; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; np = inet6_sk(sk); - if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.flowi_oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; - err = ip6_dst_lookup(sk, &dst, &fl); + err = ip6_dst_lookup(sk, &dst, &fl6); if (err) goto out; - dst = xfrm_lookup(net, dst, &fl, sk, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -576,7 +576,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) msg.type = ICMPV6_ECHO_REPLY; err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, + sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, (struct rt6_info*)dst, MSG_DONTWAIT, np->dontfrag); @@ -585,7 +585,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ip6_flush_pending_frames(sk); goto out_put; } - err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); + err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); out_put: if (likely(idev != NULL)) @@ -784,20 +784,20 @@ drop_no_count: return 0; } -void icmpv6_flow_init(struct sock *sk, struct flowi *fl, +void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif) { - memset(fl, 0, sizeof(*fl)); - ipv6_addr_copy(&fl->fl6_src, saddr); - ipv6_addr_copy(&fl->fl6_dst, daddr); - fl->flowi_proto = IPPROTO_ICMPV6; - fl->fl6_icmp_type = type; - fl->fl6_icmp_code = 0; - fl->flowi_oif = oif; - security_sk_classify_flow(sk, fl); + memset(fl6, 0, sizeof(*fl6)); + ipv6_addr_copy(&fl6->saddr, saddr); + ipv6_addr_copy(&fl6->daddr, daddr); + fl6->flowi6_proto = IPPROTO_ICMPV6; + fl6->uli.icmpt.type = type; + fl6->uli.icmpt.code = 0; + fl6->flowi6_oif = oif; + security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); } /* diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 1b06a24..27d6691 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -61,20 +61,20 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - final_p = fl6_update_dst(&fl, np->opt, &final); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.flowi_mark = sk->sk_mark; - fl.fl6_dport = inet_rsk(req)->rmt_port; - fl.fl6_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + final_p = fl6_update_dst(&fl6, np->opt, &final); + ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.uli.ports.dport = inet_rsk(req)->rmt_port; + fl6.uli.ports.sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) return NULL; @@ -208,28 +208,28 @@ int inet6_csk_xmit(struct sk_buff *skb) struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; struct in6_addr *final_p, final; - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = sk->sk_protocol; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.flowi_mark = sk->sk_mark; - fl.fl6_sport = inet->inet_sport; - fl.fl6_dport = inet->inet_dport; - security_sk_classify_flow(sk, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = sk->sk_protocol; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6.flowlabel); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.uli.ports.sport = inet->inet_sport; + fl6.uli.ports.dport = inet->inet_dport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (dst == NULL) { - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); @@ -244,9 +244,9 @@ int inet6_csk_xmit(struct sk_buff *skb) skb_dst_set(skb, dst_clone(dst)); /* Restore final destination back after routing done */ - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl6.daddr, &np->daddr); - return ip6_xmit(sk, skb, &fl, np->opt); + return ip6_xmit(sk, skb, &fl6, np->opt); } EXPORT_SYMBOL_GPL(inet6_csk_xmit); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index de38211..7548905 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -260,10 +260,10 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) return net->ipv6.fib6_main_tbl; } -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl, +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl, flags); + return (struct dst_entry *) lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); } static void __net_init fib6_tables_init(struct net *net) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c8fa470..f3caf1b 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -342,7 +342,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, if (olen > 0) { struct msghdr msg; - struct flowi flowi; + struct flowi6 flowi6; int junk; err = -ENOMEM; @@ -358,9 +358,9 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_controllen = olen; msg.msg_control = (void*)(fl->opt+1); - flowi.flowi_oif = 0; + memset(&flowi6, 0, sizeof(flowi6)); - err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, + err = datagram_send_ctl(net, &msg, &flowi6, fl->opt, &junk, &junk, &junk); if (err) goto done; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3d0f2ac..1820887 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -174,15 +174,15 @@ int ip6_output(struct sk_buff *skb) * xmit an sk_buff (used by TCP, SCTP and DCCP) */ -int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, +int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, struct ipv6_txoptions *opt) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *first_hop = &fl->fl6_dst; + struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; - u8 proto = fl->flowi_proto; + u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; int tclass = 0; @@ -230,13 +230,13 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel; hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; - ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); + ipv6_addr_copy(&hdr->saddr, &fl6->saddr); ipv6_addr_copy(&hdr->daddr, first_hop); skb->priority = sk->sk_priority; @@ -879,7 +879,7 @@ static inline int ip6_rt_check(struct rt6key *rt_key, static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, - struct flowi *fl) + struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *)dst; @@ -904,11 +904,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * sockets. * 2. oif also should be the same. */ - if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || + if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || #ifdef CONFIG_IPV6_SUBTREES - ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || + ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl->flowi_oif && fl->flowi_oif != dst->dev->ifindex)) { + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -918,22 +918,22 @@ out: } static int ip6_dst_lookup_tail(struct sock *sk, - struct dst_entry **dst, struct flowi *fl) + struct dst_entry **dst, struct flowi6 *fl6) { int err; struct net *net = sock_net(sk); if (*dst == NULL) - *dst = ip6_route_output(net, sk, fl); + *dst = ip6_route_output(net, sk, fl6); if ((err = (*dst)->error)) goto out_err_release; - if (ipv6_addr_any(&fl->fl6_src)) { + if (ipv6_addr_any(&fl6->saddr)) { err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, - &fl->fl6_dst, + &fl6->daddr, sk ? inet6_sk(sk)->srcprefs : 0, - &fl->fl6_src); + &fl6->saddr); if (err) goto out_err_release; } @@ -949,10 +949,10 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { struct inet6_ifaddr *ifp; - struct flowi fl_gw; + struct flowi6 fl_gw6; int redirect; - ifp = ipv6_get_ifaddr(net, &fl->fl6_src, + ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); @@ -965,9 +965,9 @@ static int ip6_dst_lookup_tail(struct sock *sk, * default router instead */ dst_release(*dst); - memcpy(&fl_gw, fl, sizeof(struct flowi)); - memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(net, sk, &fl_gw); + memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); + memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); + *dst = ip6_route_output(net, sk, &fl_gw6); if ((err = (*dst)->error)) goto out_err_release; } @@ -988,23 +988,23 @@ out_err_release: * ip6_dst_lookup - perform route lookup on flow * @sk: socket which provides route info * @dst: pointer to dst_entry * for result - * @fl: flow to lookup + * @fl6: flow to lookup * * This function performs a route lookup on the given flow. * * It returns zero on success, or a standard errno code on error. */ -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { *dst = NULL; - return ip6_dst_lookup_tail(sk, dst, fl); + return ip6_dst_lookup_tail(sk, dst, fl6); } EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** * ip6_dst_lookup_flow - perform route lookup on flow with ipsec * @sk: socket which provides route info - * @fl: flow to lookup + * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * @can_sleep: we are in a sleepable context * @@ -1013,29 +1013,29 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * It returns a valid dst pointer on success, or a pointer encoded * error code. */ -struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi *fl, +struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool can_sleep) { struct dst_entry *dst = NULL; int err; - err = ip6_dst_lookup_tail(sk, &dst, fl); + err = ip6_dst_lookup_tail(sk, &dst, fl6); if (err) return ERR_PTR(err); if (final_dst) - ipv6_addr_copy(&fl->fl6_dst, final_dst); + ipv6_addr_copy(&fl6->daddr, final_dst); if (can_sleep) - fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; + fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; - return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); + return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); /** * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow * @sk: socket which provides the dst cache and route info - * @fl: flow to lookup + * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * @can_sleep: we are in a sleepable context * @@ -1047,24 +1047,24 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * It returns a valid dst pointer on success, or a pointer encoded * error code. */ -struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi *fl, +struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool can_sleep) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); int err; - dst = ip6_sk_dst_check(sk, dst, fl); + dst = ip6_sk_dst_check(sk, dst, fl6); - err = ip6_dst_lookup_tail(sk, &dst, fl); + err = ip6_dst_lookup_tail(sk, &dst, fl6); if (err) return ERR_PTR(err); if (final_dst) - ipv6_addr_copy(&fl->fl6_dst, final_dst); + ipv6_addr_copy(&fl6->daddr, final_dst); if (can_sleep) - fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; + fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; - return xfrm_lookup(sock_net(sk), dst, fl, sk, 0); + return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); @@ -1145,7 +1145,7 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); @@ -1203,7 +1203,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } dst_hold(&rt->dst); inet->cork.dst = &rt->dst; - inet->cork.fl = *fl; + inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? @@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, transhdrlen += exthdrlen; } else { rt = (struct rt6_info *)inet->cork.dst; - fl = &inet->cork.fl; + fl6 = &inet->cork.fl.u.ip6; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; @@ -1239,7 +1239,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); + ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); return -EMSGSIZE; } } @@ -1271,7 +1271,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (length > mtu) { int proto = sk->sk_protocol; if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); + ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); return -EMSGSIZE; } @@ -1516,8 +1516,8 @@ int ip6_push_pending_frames(struct sock *sk) struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; - struct flowi *fl = &inet->cork.fl; - unsigned char proto = fl->flowi_proto; + struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + unsigned char proto = fl6->flowi6_proto; int err = 0; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) @@ -1542,7 +1542,7 @@ int ip6_push_pending_frames(struct sock *sk) if (np->pmtudisc < IPV6_PMTUDISC_DO) skb->local_df = 1; - ipv6_addr_copy(final_dst, &fl->fl6_dst); + ipv6_addr_copy(final_dst, &fl6->daddr); __skb_pull(skb, skb_network_header_len(skb)); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -1553,12 +1553,12 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - *(__be32*)hdr = fl->fl6_flowlabel | + *(__be32*)hdr = fl6->flowlabel | htonl(0x60000000 | ((int)np->cork.tclass << 20)); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; - ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); + ipv6_addr_copy(&hdr->saddr, &fl6->saddr); ipv6_addr_copy(&hdr->daddr, final_dst); skb->priority = sk->sk_priority; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index c3fc824..c1b1bd3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -884,7 +884,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) static int ip6_tnl_xmit2(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, - struct flowi *fl, + struct flowi6 *fl6, int encap_limit, __u32 *pmtu) { @@ -904,11 +904,11 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(net, NULL, fl); + dst = ip6_route_output(net, NULL, fl6); if (dst->error) goto tx_err_link_failure; - dst = xfrm_lookup(net, dst, fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -963,7 +963,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb->transport_header = skb->network_header; - proto = fl->flowi_proto; + proto = fl6->flowi6_proto; if (encap_limit >= 0) { init_tel_txopt(&opt, encap_limit); ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); @@ -971,13 +971,13 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); - *(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000); + *(__be32*)ipv6h = fl6->flowlabel | htonl(0x60000000); dsfield = INET_ECN_encapsulate(0, dsfield); ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = proto; - ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src); - ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst); + ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr); + ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr); nf_reset(skb); pkt_len = skb->len; err = ip6_local_out(skb); @@ -1007,7 +1007,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct iphdr *iph = ip_hdr(skb); int encap_limit = -1; - struct flowi fl; + struct flowi6 fl6; __u8 dsfield; __u32 mtu; int err; @@ -1019,16 +1019,16 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl, &t->fl, sizeof (fl)); - fl.flowi_proto = IPPROTO_IPIP; + memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; dsfield = ipv4_get_dsfield(iph); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) - fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) & IPV6_TCLASS_MASK; - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); + err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) @@ -1047,7 +1047,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct ipv6hdr *ipv6h = ipv6_hdr(skb); int encap_limit = -1; __u16 offset; - struct flowi fl; + struct flowi6 fl6; __u8 dsfield; __u32 mtu; int err; @@ -1069,16 +1069,16 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; - memcpy(&fl, &t->fl, sizeof (fl)); - fl.flowi_proto = IPPROTO_IPV6; + memcpy(&fl6, &t->fl.u.ip6, sizeof (fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; dsfield = ipv6_get_dsfield(ipv6h); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) - fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) - fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); - err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); + err = ip6_tnl_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); if (err != 0) { if (err == -EMSGSIZE) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -1141,21 +1141,21 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct ip6_tnl_parm *p = &t->parms; - struct flowi *fl = &t->fl; + struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); /* Set up flowi template */ - ipv6_addr_copy(&fl->fl6_src, &p->laddr); - ipv6_addr_copy(&fl->fl6_dst, &p->raddr); - fl->flowi_oif = p->link; - fl->fl6_flowlabel = 0; + ipv6_addr_copy(&fl6->saddr, &p->laddr); + ipv6_addr_copy(&fl6->daddr, &p->raddr); + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) - fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) - fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; ip6_tnl_set_cap(t); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 61a8be3..7ff0343 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -135,14 +135,15 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) return NULL; } -static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, +static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr6_table **mrt) { struct ip6mr_result res; struct fib_lookup_arg arg = { .result = &res, }; int err; - err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(net->ipv6.mr6_rules_ops, + flowi6_to_flowi(flp6), 0, &arg); if (err < 0) return err; *mrt = res.mrt; @@ -270,7 +271,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) return net->ipv6.mrt6; } -static int ip6mr_fib_lookup(struct net *net, struct flowi *flp, +static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr6_table **mrt) { *mrt = net->ipv6.mrt6; @@ -617,9 +618,9 @@ static int pim6_rcv(struct sk_buff *skb) struct net_device *reg_dev = NULL; struct net *net = dev_net(skb->dev); struct mr6_table *mrt; - struct flowi fl = { - .flowi_iif = skb->dev->ifindex, - .flowi_mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; int reg_vif_num; @@ -644,7 +645,7 @@ static int pim6_rcv(struct sk_buff *skb) ntohs(encap->payload_len) + sizeof(*pim) > skb->len) goto drop; - if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto drop; reg_vif_num = mrt->mroute_reg_vif_num; @@ -687,14 +688,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, { struct net *net = dev_net(dev); struct mr6_table *mrt; - struct flowi fl = { - .flowi_oif = dev->ifindex, - .flowi_iif = skb->skb_iif, - .flowi_mark = skb->mark, + struct flowi6 fl6 = { + .flowi6_oif = dev->ifindex, + .flowi6_iif = skb->skb_iif, + .flowi6_mark = skb->mark, }; int err; - err = ip6mr_fib_lookup(net, &fl, &mrt); + err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) return err; @@ -1547,13 +1548,13 @@ int ip6mr_sk_done(struct sock *sk) struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; - struct flowi fl = { - .flowi_iif = skb->skb_iif, - .flowi_oif = skb->dev->ifindex, - .flowi_mark= skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->skb_iif, + .flowi6_oif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; - if (ip6mr_fib_lookup(net, &fl, &mrt) < 0) + if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) return NULL; return mrt->mroute6_sk; @@ -1897,7 +1898,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, struct mif_device *vif = &mrt->vif6_table[vifi]; struct net_device *dev; struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; if (vif->dev == NULL) goto out_free; @@ -1915,12 +1916,12 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, ipv6h = ipv6_hdr(skb); - fl = (struct flowi) { - .flowi_oif = vif->link, - .fl6_dst = ipv6h->daddr, + fl6 = (struct flowi6) { + .flowi6_oif = vif->link, + .daddr = ipv6h->daddr, }; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (!dst) goto out_free; @@ -2043,13 +2044,13 @@ int ip6_mr_input(struct sk_buff *skb) struct mfc6_cache *cache; struct net *net = dev_net(skb->dev); struct mr6_table *mrt; - struct flowi fl = { - .flowi_iif = skb->dev->ifindex, - .flowi_mark= skb->mark, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .flowi6_mark = skb->mark, }; int err; - err = ip6mr_fib_lookup(net, &fl, &mrt); + err = ip6mr_fib_lookup(net, &fl6, &mrt); if (err < 0) return err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1448c50..9cb191e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -444,12 +444,12 @@ sticky_done: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; - struct flowi fl; + struct flowi6 fl6; int junk; - fl.fl6_flowlabel = 0; - fl.flowi_oif = sk->sk_bound_dev_if; - fl.flowi_mark = sk->sk_mark; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; if (optlen == 0) goto update; @@ -475,7 +475,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk, + retv = datagram_send_ctl(net, &msg, &fl6, opt, &junk, &junk, &junk); if (retv) goto done; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f2c9b69..76b8937 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1396,7 +1396,7 @@ static void mld_sendpack(struct sk_buff *skb) struct inet6_dev *idev; struct net *net = dev_net(skb->dev); int err; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; rcu_read_lock(); @@ -1419,11 +1419,11 @@ static void mld_sendpack(struct sk_buff *skb) goto err_out; } - icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT, + icmpv6_flow_init(net->ipv6.igmp_sk, &fl6, ICMPV6_MLD2_REPORT, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); err = 0; if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -1731,7 +1731,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; if (type == ICMPV6_MGM_REDUCTION) @@ -1791,11 +1791,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) goto err_out; } - icmpv6_flow_init(sk, &fl, type, + icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err_out; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index e1767ae..6a13735 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -208,14 +208,15 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, { struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; + const struct flowi6 *fl6 = &fl->u.ip6; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; int offset; struct timeval stamp; int err = 0; - if (unlikely(fl->flowi_proto == IPPROTO_MH && - fl->fl6_mh_type <= IP6_MH_TYPE_MAX)) + if (unlikely(fl6->flowi6_proto == IPPROTO_MH && + fl6->uli.mht.type <= IP6_MH_TYPE_MAX)) goto out; if (likely(opt->dsthao)) { @@ -240,14 +241,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, sizeof(sel.saddr)); sel.prefixlen_s = 128; sel.family = AF_INET6; - sel.proto = fl->flowi_proto; - sel.dport = xfrm_flowi_dport(fl, &fl->u.ip6.uli); + sel.proto = fl6->flowi6_proto; + sel.dport = xfrm_flowi_dport(fl, &fl6->uli); if (sel.dport) sel.dport_mask = htons(~0); - sel.sport = xfrm_flowi_sport(fl, &fl->u.ip6.uli); + sel.sport = xfrm_flowi_sport(fl, &fl6->uli); if (sel.sport) sel.sport_mask = htons(~0); - sel.ifindex = fl->flowi_oif; + sel.ifindex = fl6->flowi6_oif; err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9360d3b..0e49c9d 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -511,7 +511,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *saddr, struct icmp6hdr *icmp6h) { - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; @@ -521,7 +521,7 @@ void ndisc_send_skb(struct sk_buff *skb, type = icmp6h->icmp6_type; - icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); + icmpv6_flow_init(sk, &fl6, type, saddr, daddr, dev->ifindex); dst = icmp6_dst_alloc(dev, neigh, daddr); if (!dst) { @@ -529,7 +529,7 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) { kfree_skb(skb); return; @@ -1515,7 +1515,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, struct rt6_info *rt; struct dst_entry *dst; struct inet6_dev *idev; - struct flowi fl; + struct flowi6 fl6; u8 *opt; int rd_len; int err; @@ -1535,14 +1535,14 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - icmpv6_flow_init(sk, &fl, NDISC_REDIRECT, + icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL) return; - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) return; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d282c62..39aaca2 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -15,14 +15,14 @@ int ip6_route_me_harder(struct sk_buff *skb) struct net *net = dev_net(skb_dst(skb)->dev); struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; - struct flowi fl = { - .flowi_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, - .flowi_mark = skb->mark, - .fl6_dst = iph->daddr, - .fl6_src = iph->saddr, + struct flowi6 fl6 = { + .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .flowi6_mark = skb->mark, + .daddr = iph->daddr, + .saddr = iph->saddr, }; - dst = ip6_route_output(net, skb->sk, &fl); + dst = ip6_route_output(net, skb->sk, &fl6); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); @@ -37,9 +37,9 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET6) == 0) { + xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); - dst = xfrm_lookup(net, dst, &fl, skb->sk, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0); if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); @@ -92,7 +92,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, static int nf_ip6_route(struct dst_entry **dst, struct flowi *fl) { - *dst = ip6_route_output(&init_net, NULL, fl); + *dst = ip6_route_output(&init_net, NULL, &fl->u.ip6); return (*dst)->error; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index d1e905b..df05511 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -47,7 +47,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) struct ipv6hdr *ip6h; struct dst_entry *dst = NULL; u8 proto; - struct flowi fl; + struct flowi6 fl6; if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { @@ -89,19 +89,19 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) return; } - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); - ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); - fl.fl6_sport = otcph.dest; - fl.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, &fl); - dst = ip6_route_output(net, NULL, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.saddr, &oip6h->daddr); + ipv6_addr_copy(&fl6.daddr, &oip6h->saddr); + fl6.uli.ports.sport = otcph.dest; + fl6.uli.ports.dport = otcph.source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL || dst->error) { dst_release(dst); return; } - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) return; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d061465..259f1b2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -524,7 +524,7 @@ csum_copy_err: goto out; } -static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, +static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { struct sk_buff *skb; @@ -586,11 +586,10 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, if (unlikely(csum)) tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); - csum = csum_ipv6_magic(&fl->fl6_src, - &fl->fl6_dst, - total_len, fl->flowi_proto, tmp_csum); + csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, + total_len, fl6->flowi6_proto, tmp_csum); - if (csum == 0 && fl->flowi_proto == IPPROTO_UDP) + if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; if (skb_store_bits(skb, offset, &csum, 2)) @@ -603,7 +602,7 @@ out: } static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, - struct flowi *fl, struct dst_entry **dstp, + struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -613,7 +612,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct rt6_info *rt = (struct rt6_info *)*dstp; if (length > rt->dst.dev->mtu) { - ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu); + ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } if (flags&MSG_PROBE) @@ -662,7 +661,7 @@ error: return err; } -static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) +static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) { struct iovec *iov; u8 __user *type = NULL; @@ -679,7 +678,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (!iov) continue; - switch (fl->flowi_proto) { + switch (fl6->flowi6_proto) { case IPPROTO_ICMPV6: /* check if one-byte field is readable or not. */ if (iov->iov_base && iov->iov_len < 1) @@ -694,8 +693,8 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl->fl6_icmp_type, type) || - get_user(fl->fl6_icmp_code, code)) + if (get_user(fl6->uli.icmpt.type, type) || + get_user(fl6->uli.icmpt.code, code)) return -EFAULT; probed = 1; } @@ -706,7 +705,7 @@ static int rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) /* check if type field is readable or not. */ if (iov->iov_len > 2 - len) { u8 __user *p = iov->iov_base; - if (get_user(fl->fl6_mh_type, &p[2 - len])) + if (get_user(fl6->uli.mht.type, &p[2 - len])) return -EFAULT; probed = 1; } else @@ -735,7 +734,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; - struct flowi fl; + struct flowi6 fl6; int addr_len = msg->msg_namelen; int hlimit = -1; int tclass = -1; @@ -756,9 +755,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, /* * Get and verify the address. */ - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); - fl.flowi_mark = sk->sk_mark; + fl6.flowi6_mark = sk->sk_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -780,9 +779,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, daddr = &sin6->sin6_addr; if (np->sndflow) { - fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = &flowlabel->dst; @@ -800,32 +799,32 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.flowi_oif = sin6->sin6_scope_id; + fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; proto = inet->inet_num; daddr = &np->daddr; - fl.fl6_flowlabel = np->flow_label; + fl6.flowlabel = np->flow_label; } - if (fl.flowi_oif == 0) - fl.flowi_oif = sk->sk_bound_dev_if; + if (fl6.flowi6_oif == 0) + fl6.flowi6_oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -838,31 +837,31 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.flowi_proto = proto; - err = rawv6_probe_proto_opt(&fl, msg); + fl6.flowi6_proto = proto; + err = rawv6_probe_proto_opt(&fl6, msg); if (err) goto out; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl.fl6_dst, daddr); + ipv6_addr_copy(&fl6.daddr, daddr); else - fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl.fl6_src, &np->saddr); + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl6.saddr, &np->saddr); - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) - fl.flowi_oif = np->mcast_oif; - security_sk_classify_flow(sk, &fl); + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; } if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -881,17 +880,17 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags); + err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, - len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, + len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, msg->msg_flags, dontfrag); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) - err = rawv6_push_pending_frames(sk, &fl, rp); + err = rawv6_push_pending_frames(sk, &fl6, rp); release_sock(sk); } done: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3b20d6..6814c87 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -599,17 +599,17 @@ do { \ static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { struct fib6_node *fn; struct rt6_info *rt; read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, &fl->fl6_src, fl->flowi_oif, flags); - BACKTRACK(net, &fl->fl6_src); + rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); + BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); @@ -620,19 +620,19 @@ out: struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int strict) { - struct flowi fl = { - .flowi_oif = oif, - .fl6_dst = *daddr, + struct flowi6 fl6 = { + .flowi6_oif = oif, + .daddr = *daddr, }; struct dst_entry *dst; int flags = strict ? RT6_LOOKUP_F_IFACE : 0; if (saddr) { - memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); + memcpy(&fl6.saddr, saddr, sizeof(*saddr)); flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -753,7 +753,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d } static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -768,12 +768,12 @@ relookup: read_lock_bh(&table->tb6_lock); restart_2: - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: rt = rt6_select(fn, oif, strict | reachable); - BACKTRACK(net, &fl->fl6_src); + BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; @@ -782,9 +782,9 @@ restart: read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); + nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) - nrt = rt6_alloc_clone(rt, &fl->fl6_dst); + nrt = rt6_alloc_clone(rt, &fl6->daddr); else goto out2; @@ -823,9 +823,9 @@ out2: } static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { - return ip6_pol_route(net, table, fl->flowi_iif, fl, flags); + return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } void ip6_route_input(struct sk_buff *skb) @@ -833,41 +833,41 @@ void ip6_route_input(struct sk_buff *skb) struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = dev_net(skb->dev); int flags = RT6_LOOKUP_F_HAS_SADDR; - struct flowi fl = { - .flowi_iif = skb->dev->ifindex, - .fl6_dst = iph->daddr, - .fl6_src = iph->saddr, - .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - .flowi_mark = skb->mark, - .flowi_proto = iph->nexthdr, + struct flowi6 fl6 = { + .flowi6_iif = skb->dev->ifindex, + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, + .flowi6_mark = skb->mark, + .flowi6_proto = iph->nexthdr, }; if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); + skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); } static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, - struct flowi *fl, int flags) + struct flowi6 *fl6, int flags) { - return ip6_pol_route(net, table, fl->flowi_oif, fl, flags); + return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, - struct flowi *fl) + struct flowi6 *fl6) { int flags = 0; - if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) flags |= RT6_LOOKUP_F_IFACE; - if (!ipv6_addr_any(&fl->fl6_src)) + if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); - return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } EXPORT_SYMBOL(ip6_route_output); @@ -1444,16 +1444,16 @@ static int ip6_route_del(struct fib6_config *cfg) * Handle redirects */ struct ip6rd_flowi { - struct flowi fl; + struct flowi6 fl6; struct in6_addr gateway; }; static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, - struct flowi *fl, + struct flowi6 *fl6, int flags) { - struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; + struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; struct rt6_info *rt; struct fib6_node *fn; @@ -1469,7 +1469,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, */ read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { /* @@ -1484,7 +1484,7 @@ restart: continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (fl->flowi_oif != rt->rt6i_dev->ifindex) + if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) continue; if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) continue; @@ -1493,7 +1493,7 @@ restart: if (!rt) rt = net->ipv6.ip6_null_entry; - BACKTRACK(net, &fl->fl6_src); + BACKTRACK(net, &fl6->saddr); out: dst_hold(&rt->dst); @@ -1510,10 +1510,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, int flags = RT6_LOOKUP_F_HAS_SADDR; struct net *net = dev_net(dev); struct ip6rd_flowi rdfl = { - .fl = { - .flowi_oif = dev->ifindex, - .fl6_dst = *dest, - .fl6_src = *src, + .fl6 = { + .flowi6_oif = dev->ifindex, + .daddr = *dest, + .saddr = *src, }, }; @@ -1522,7 +1522,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, if (rt6_need_strict(dest)) flags |= RT6_LOOKUP_F_IFACE; - return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, + return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, flags, __ip6_route_redirect); } @@ -2385,7 +2385,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void struct rt6_info *rt; struct sk_buff *skb; struct rtmsg *rtm; - struct flowi fl; + struct flowi6 fl6; int err, iif = 0; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); @@ -2393,27 +2393,27 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void goto errout; err = -EINVAL; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (tb[RTA_SRC]) { if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) goto errout; - ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); + ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); } if (tb[RTA_DST]) { if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) goto errout; - ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); + ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); } if (tb[RTA_IIF]) iif = nla_get_u32(tb[RTA_IIF]); if (tb[RTA_OIF]) - fl.flowi_oif = nla_get_u32(tb[RTA_OIF]); + fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); if (iif) { struct net_device *dev; @@ -2436,10 +2436,10 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); + rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); skb_dst_set(skb, &rt->dst); - err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, + err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 5b9eded..97858d5 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -232,19 +232,19 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) */ { struct in6_addr *final_p, final; - struct flowi fl; - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - final_p = fl6_update_dst(&fl, np->opt, &final); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.flowi_mark = sk->sk_mark; - fl.fl6_dport = inet_rsk(req)->rmt_port; - fl.fl6_sport = inet_sk(sk)->inet_sport; - security_req_classify_flow(req, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + struct flowi6 fl6; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr); + final_p = fl6_update_dst(&fl6, np->opt, &final); + ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.uli.ports.dport = inet_rsk(req)->rmt_port; + fl6.uli.ports.sport = inet_sk(sk)->inet_sport; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c531ad5..7ed0ba1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -131,7 +131,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct rt6_info *rt; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; @@ -142,14 +142,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { + fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl6.flowlabel); + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); @@ -195,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; + np->flow_label = fl6.flowlabel; /* * TCP over IPv4 @@ -242,27 +242,27 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (!ipv6_addr_any(&np->rcv_saddr)) saddr = &np->rcv_saddr; - fl.flowi_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, (saddr ? saddr : &np->saddr)); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.flowi_mark = sk->sk_mark; - fl.fl6_dport = usin->sin6_port; - fl.fl6_sport = inet->inet_sport; + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.uli.ports.dport = usin->sin6_port; + fl6.uli.ports.sport = inet->inet_sport; - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } if (saddr == NULL) { - saddr = &fl.fl6_src; + saddr = &fl6.saddr; ipv6_addr_copy(&np->rcv_saddr, saddr); } @@ -389,23 +389,23 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst == NULL) { struct inet_sock *inet = inet_sk(sk); - struct flowi fl; + struct flowi6 fl6; /* BUGGG_FUTURE: Again, it is not clear how to handle rthdr case. Ignore this complexity for now. */ - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.flowi_mark = sk->sk_mark; - fl.fl6_dport = inet->inet_dport; - fl.fl6_sport = inet->inet_sport; - security_skb_classify_flow(skb, &fl); - - dst = ip6_dst_lookup_flow(sk, &fl, NULL, false); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &np->daddr); + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.flowi6_mark = sk->sk_mark; + fl6.uli.ports.dport = inet->inet_dport; + fl6.uli.ports.sport = inet->inet_sport; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); if (IS_ERR(dst)) { sk->sk_err_soft = -PTR_ERR(dst); goto out; @@ -482,25 +482,25 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, struct sk_buff * skb; struct ipv6_txoptions *opt = NULL; struct in6_addr * final_p, final; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int err; - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = IPPROTO_TCP; - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); - fl.fl6_flowlabel = 0; - fl.flowi_oif = treq->iif; - fl.flowi_mark = sk->sk_mark; - fl.fl6_dport = inet_rsk(req)->rmt_port; - fl.fl6_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); + fl6.flowlabel = 0; + fl6.flowi6_oif = treq->iif; + fl6.flowi6_mark = sk->sk_mark; + fl6.uli.ports.dport = inet_rsk(req)->rmt_port; + fl6.uli.ports.sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto done; @@ -510,8 +510,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); - ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt); + ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr); + err = ip6_xmit(sk, skb, &fl6, opt); err = net_xmit_eval(err); } @@ -992,7 +992,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, { struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; - struct flowi fl; + struct flowi6 fl6; struct net *net = dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); @@ -1046,29 +1046,29 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr); + ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr); buff->ip_summed = CHECKSUM_PARTIAL; buff->csum = 0; - __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst); + __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); - fl.flowi_proto = IPPROTO_TCP; - fl.flowi_oif = inet6_iif(skb); - fl.fl6_dport = t1->dest; - fl.fl6_sport = t1->source; - security_skb_classify_flow(skb, &fl); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.flowi6_oif = inet6_iif(skb); + fl6.uli.ports.dport = t1->dest; + fl6.uli.ports.sport = t1->source; + security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST * Underlying function will use this to retrieve the network * namespace */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); - ip6_xmit(ctl_sk, buff, &fl, NULL); + ip6_xmit(ctl_sk, buff, &fl6, NULL); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index dad035f..ce4b16f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -886,7 +886,7 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; + struct flowi6 *fl6 = &inet->cork.fl.u.ip6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; @@ -899,23 +899,23 @@ static int udp_v6_push_pending_frames(struct sock *sk) * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl->fl6_sport; - uh->dest = fl->fl6_dport; + uh->source = fl6->uli.ports.sport; + uh->dest = fl6->uli.ports.dport; uh->len = htons(up->len); uh->check = 0; if (is_udplite) csum = udplite_csum_outgoing(sk, skb); else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, + udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, up->len); goto send; } else csum = udp_csum_outgoing(sk, skb); /* add protocol-dependent pseudo-header */ - uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - up->len, fl->flowi_proto, csum); + uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, + up->len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -947,7 +947,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_len = msg->msg_namelen; int ulen = len; @@ -1030,19 +1030,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl.fl6_dport = sin6->sin6_port; + fl6.uli.ports.dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = &flowlabel->dst; @@ -1060,38 +1060,38 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.flowi_oif = sin6->sin6_scope_id; + fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl.fl6_dport = inet->inet_dport; + fl6.uli.ports.dport = inet->inet_dport; daddr = &np->daddr; - fl.fl6_flowlabel = np->flow_label; + fl6.flowlabel = np->flow_label; connected = 1; } - if (!fl.flowi_oif) - fl.flowi_oif = sk->sk_bound_dev_if; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = sk->sk_bound_dev_if; - if (!fl.flowi_oif) - fl.flowi_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl.flowi_mark = sk->sk_mark; + fl6.flowi6_mark = sk->sk_mark; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + err = datagram_send_ctl(sock_net(sk), msg, &fl6, opt, &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -1105,27 +1105,27 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.flowi_proto = sk->sk_protocol; + fl6.flowi6_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl.fl6_dst, daddr); + ipv6_addr_copy(&fl6.daddr, daddr); else - fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl6_sport = inet->inet_sport; + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&fl6.saddr, &np->saddr); + fl6.uli.ports.sport = inet->inet_sport; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); if (final_p) connected = 0; - if (!fl.flowi_oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { - fl.flowi_oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { + fl6.flowi6_oif = np->mcast_oif; connected = 0; } - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_sk_dst_lookup_flow(sk, &fl, final_p, true); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -1133,7 +1133,7 @@ do_udp_sendmsg: } if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) + if (ipv6_addr_is_multicast(&fl6.daddr)) hlimit = np->mcast_hops; else hlimit = np->hop_limit; @@ -1168,7 +1168,7 @@ do_append_data: up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, &fl, + sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) @@ -1181,10 +1181,10 @@ do_append_data: if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? + ipv6_addr_equal(&fl6.daddr, &np->daddr) ? &np->daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : #endif NULL); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 254aa6d..bef6200 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -39,8 +39,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); - dst = ip6_route_output(net, NULL, - flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); err = dst->error; if (dst->error) { diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d07a32a..a60b20f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -75,15 +75,13 @@ static int __ip_vs_addr_is_local_v6(struct net *net, const struct in6_addr *addr) { struct rt6_info *rt; - struct flowi fl = { - .flowi_oif = 0, - .fl6_dst = *addr, - .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, + struct flowi6 fl6 = { + .daddr = *addr, }; - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl); + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) - return 1; + return 1; return 0; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 7dc00e3..6132b21 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -198,27 +198,27 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, struct in6_addr *ret_saddr, int do_xfrm) { struct dst_entry *dst; - struct flowi fl = { - .fl6_dst = *daddr, + struct flowi6 fl6 = { + .daddr = *daddr, }; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst->error) goto out_err; if (!ret_saddr) return dst; - if (ipv6_addr_any(&fl.fl6_src) && + if (ipv6_addr_any(&fl6.saddr) && ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, - &fl.fl6_dst, 0, &fl.fl6_src) < 0) + &fl6.daddr, 0, &fl6.saddr) < 0) goto out_err; if (do_xfrm) { - dst = xfrm_lookup(net, dst, &fl, NULL, 0); + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) { dst = NULL; goto out_err; } } - ipv6_addr_copy(ret_saddr, &fl.fl6_src); + ipv6_addr_copy(ret_saddr, &fl6.saddr); return dst; out_err: diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index d8c00f9..5f054a0 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -143,18 +143,18 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) const struct ipv6hdr *iph = ipv6_hdr(skb); struct net *net = pick_net(skb); struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (info->priv) { if (info->priv->oif == -1) return false; - fl.flowi_oif = info->priv->oif; + fl6.flowi6_oif = info->priv->oif; } - fl.fl6_dst = info->gw.in6; - fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + fl6.daddr = info->gw.in6; + fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; - dst = ip6_route_output(net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL) return false; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 8316271..865ce7b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -201,40 +201,40 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) { struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); - fl.flowi_proto = sk->sk_protocol; + fl6.flowi6_proto = sk->sk_protocol; /* Fill in the dest address from the route entry passed with the skb * and the source address from the transport. */ - ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr); - ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr); + ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr); + ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr); - fl.fl6_flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel); - if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL) - fl.flowi_oif = transport->saddr.v6.sin6_scope_id; + fl6.flowlabel = np->flow_label; + IP6_ECN_flow_xmit(sk, fl6.flowlabel); + if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id; else - fl.flowi_oif = sk->sk_bound_dev_if; + fl6.flowi6_oif = sk->sk_bound_dev_if; if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - ipv6_addr_copy(&fl.fl6_dst, rt0->addr); + ipv6_addr_copy(&fl6.daddr, rt0->addr); } SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, - &fl.fl6_src, &fl.fl6_dst); + &fl6.saddr, &fl6.daddr); SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; - return ip6_xmit(sk, skb, &fl, np->opt); + return ip6_xmit(sk, skb, &fl6, np->opt); } /* Returns the dst cache entry for the given source and destination ip @@ -245,22 +245,22 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, union sctp_addr *saddr) { struct dst_entry *dst; - struct flowi fl; + struct flowi6 fl6; - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); + memset(&fl6, 0, sizeof(fl6)); + ipv6_addr_copy(&fl6.daddr, &daddr->v6.sin6_addr); if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) - fl.flowi_oif = daddr->v6.sin6_scope_id; + fl6.flowi6_oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); + SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6.daddr); if (saddr) { - ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src); + ipv6_addr_copy(&fl6.saddr, &saddr->v6.sin6_addr); + SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6.saddr); } - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(&init_net, NULL, &fl6); if (!dst->error) { struct rt6_info *rt; rt = (struct rt6_info *)dst; -- cgit v1.1 From 1958b856c1a59c0f1e892b92debb8c9fe4f364dc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 16:36:19 -0500 Subject: net: Put fl6_* macros to struct flowi6 and use them again. Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 20 ++++++++++---------- net/ipv6/af_inet6.c | 4 ++-- net/ipv6/datagram.c | 6 +++--- net/ipv6/icmp.c | 10 +++++----- net/ipv6/inet6_connection_sock.c | 8 ++++---- net/ipv6/mip6.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 4 ++-- net/ipv6/raw.c | 6 +++--- net/ipv6/syncookies.c | 4 ++-- net/ipv6/tcp_ipv6.c | 16 ++++++++-------- net/ipv6/udp.c | 10 +++++----- net/ipv6/xfrm6_policy.c | 12 ++++++------ 12 files changed, 51 insertions(+), 51 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 8d26c12..de1b7e3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -158,8 +158,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_addr_copy(&fl6.daddr, &np->daddr); ipv6_addr_copy(&fl6.saddr, &np->saddr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.uli.ports.dport = inet->inet_dport; - fl6.uli.ports.sport = inet->inet_sport; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); @@ -253,8 +253,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); fl6.flowlabel = 0; fl6.flowi6_oif = ireq6->iif; - fl6.uli.ports.dport = inet_rsk(req)->rmt_port; - fl6.uli.ports.sport = inet_rsk(req)->loc_port; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; @@ -323,8 +323,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) fl6.flowi6_proto = IPPROTO_DCCP; fl6.flowi6_oif = inet6_iif(rxskb); - fl6.uli.ports.dport = dccp_hdr(skb)->dccph_dport; - fl6.uli.ports.sport = dccp_hdr(skb)->dccph_sport; + fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; + fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ @@ -535,8 +535,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, final_p = fl6_update_dst(&fl6, opt, &final); ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.uli.ports.dport = inet_rsk(req)->rmt_port; - fl6.uli.ports.sport = inet_rsk(req)->loc_port; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -957,8 +957,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&fl6.daddr, &np->daddr); ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.uli.ports.dport = usin->sin6_port; - fl6.uli.ports.sport = inet->inet_sport; + fl6.fl6_dport = usin->sin6_port; + fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 689eea6..4b13d5d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -661,8 +661,8 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.uli.ports.dport = inet->inet_dport; - fl6.uli.ports.sport = inet->inet_sport; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 04ae676..1656033 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -151,8 +151,8 @@ ipv4_connected: ipv6_addr_copy(&fl6.saddr, &np->saddr); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.uli.ports.dport = inet->inet_dport; - fl6.uli.ports.sport = inet->inet_sport; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; @@ -261,7 +261,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) serr->ee.ee_info = info; serr->ee.ee_data = 0; serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb); - serr->port = fl6->uli.ports.dport; + serr->port = fl6->fl6_dport; __skb_pull(skb, skb_tail_pointer(skb) - skb->data); skb_reset_transport_header(skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f7b9041..83cb4f9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -448,8 +448,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (saddr) ipv6_addr_copy(&fl6.saddr, saddr); fl6.flowi6_oif = iif; - fl6.uli.icmpt.type = type; - fl6.uli.icmpt.code = code; + fl6.fl6_icmp_type = type; + fl6.fl6_icmp_code = code; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -544,7 +544,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (saddr) ipv6_addr_copy(&fl6.saddr, saddr); fl6.flowi6_oif = skb->dev->ifindex; - fl6.uli.icmpt.type = ICMPV6_ECHO_REPLY; + fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -794,8 +794,8 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, ipv6_addr_copy(&fl6->saddr, saddr); ipv6_addr_copy(&fl6->daddr, daddr); fl6->flowi6_proto = IPPROTO_ICMPV6; - fl6->uli.icmpt.type = type; - fl6->uli.icmpt.code = 0; + fl6->fl6_icmp_type = type; + fl6->fl6_icmp_code = 0; fl6->flowi6_oif = oif; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 27d6691..1660546 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -70,8 +70,8 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.uli.ports.dport = inet_rsk(req)->rmt_port; - fl6.uli.ports.sport = inet_rsk(req)->loc_port; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -220,8 +220,8 @@ int inet6_csk_xmit(struct sk_buff *skb) IP6_ECN_flow_xmit(sk, fl6.flowlabel); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.uli.ports.sport = inet->inet_sport; - fl6.uli.ports.dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; + fl6.fl6_dport = inet->inet_dport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 6a13735..9b21048 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -216,7 +216,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, int err = 0; if (unlikely(fl6->flowi6_proto == IPPROTO_MH && - fl6->uli.mht.type <= IP6_MH_TYPE_MAX)) + fl6->fl6_mh_type <= IP6_MH_TYPE_MAX)) goto out; if (likely(opt->dsthao)) { diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index df05511..28e7448 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -93,8 +93,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) fl6.flowi6_proto = IPPROTO_TCP; ipv6_addr_copy(&fl6.saddr, &oip6h->daddr); ipv6_addr_copy(&fl6.daddr, &oip6h->saddr); - fl6.uli.ports.sport = otcph.dest; - fl6.uli.ports.dport = otcph.source; + fl6.fl6_sport = otcph.dest; + fl6.fl6_dport = otcph.source; security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); dst = ip6_route_output(net, NULL, &fl6); if (dst == NULL || dst->error) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 259f1b2..4a1c3b4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -693,8 +693,8 @@ static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) code = iov->iov_base; if (type && code) { - if (get_user(fl6->uli.icmpt.type, type) || - get_user(fl6->uli.icmpt.code, code)) + if (get_user(fl6->fl6_icmp_type, type) || + get_user(fl6->fl6_icmp_code, code)) return -EFAULT; probed = 1; } @@ -705,7 +705,7 @@ static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) /* check if type field is readable or not. */ if (iov->iov_len > 2 - len) { u8 __user *p = iov->iov_base; - if (get_user(fl6->uli.mht.type, &p[2 - len])) + if (get_user(fl6->fl6_mh_type, &p[2 - len])) return -EFAULT; probed = 1; } else diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 97858d5..352c260 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -240,8 +240,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.uli.ports.dport = inet_rsk(req)->rmt_port; - fl6.uli.ports.sport = inet_sk(sk)->inet_sport; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7ed0ba1..2b0c186 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -248,8 +248,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, (saddr ? saddr : &np->saddr)); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.uli.ports.dport = usin->sin6_port; - fl6.uli.ports.sport = inet->inet_sport; + fl6.fl6_dport = usin->sin6_port; + fl6.fl6_sport = inet->inet_sport; final_p = fl6_update_dst(&fl6, np->opt, &final); @@ -401,8 +401,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_addr_copy(&fl6.saddr, &np->saddr); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; - fl6.uli.ports.dport = inet->inet_dport; - fl6.uli.ports.sport = inet->inet_sport; + fl6.fl6_dport = inet->inet_dport; + fl6.fl6_sport = inet->inet_sport; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); @@ -493,8 +493,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl6.flowlabel = 0; fl6.flowi6_oif = treq->iif; fl6.flowi6_mark = sk->sk_mark; - fl6.uli.ports.dport = inet_rsk(req)->rmt_port; - fl6.uli.ports.sport = inet_rsk(req)->loc_port; + fl6.fl6_dport = inet_rsk(req)->rmt_port; + fl6.fl6_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; @@ -1057,8 +1057,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl6.flowi6_proto = IPPROTO_TCP; fl6.flowi6_oif = inet6_iif(skb); - fl6.uli.ports.dport = t1->dest; - fl6.uli.ports.sport = t1->source; + fl6.fl6_dport = t1->dest; + fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ce4b16f..d7037c0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -899,8 +899,8 @@ static int udp_v6_push_pending_frames(struct sock *sk) * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl6->uli.ports.sport; - uh->dest = fl6->uli.ports.dport; + uh->source = fl6->fl6_sport; + uh->dest = fl6->fl6_dport; uh->len = htons(up->len); uh->check = 0; @@ -1036,7 +1036,7 @@ do_udp_sendmsg: if (sin6->sin6_port == 0) return -EINVAL; - fl6.uli.ports.dport = sin6->sin6_port; + fl6.fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { @@ -1065,7 +1065,7 @@ do_udp_sendmsg: if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl6.uli.ports.dport = inet->inet_dport; + fl6.fl6_dport = inet->inet_dport; daddr = &np->daddr; fl6.flowlabel = np->flow_label; connected = 1; @@ -1112,7 +1112,7 @@ do_udp_sendmsg: fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) ipv6_addr_copy(&fl6.saddr, &np->saddr); - fl6.uli.ports.sport = inet->inet_sport; + fl6.fl6_sport = inet->inet_sport; final_p = fl6_update_dst(&fl6, opt, &final); if (final_p) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index bef6200..05e34c8 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -160,8 +160,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, nh + offset + 4 - skb->data))) { __be16 *ports = (__be16 *)exthdr; - fl6->uli.ports.sport = ports[!!reverse]; - fl6->uli.ports.dport = ports[!reverse]; + fl6->fl6_sport = ports[!!reverse]; + fl6->fl6_dport = ports[!reverse]; } fl6->flowi6_proto = nexthdr; return; @@ -170,8 +170,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { u8 *icmp = (u8 *)exthdr; - fl6->uli.icmpt.type = icmp[0]; - fl6->uli.icmpt.code = icmp[1]; + fl6->fl6_icmp_type = icmp[0]; + fl6->fl6_icmp_code = icmp[1]; } fl6->flowi6_proto = nexthdr; return; @@ -182,7 +182,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ip6_mh *mh; mh = (struct ip6_mh *)exthdr; - fl6->uli.mht.type = mh->ip6mh_type; + fl6->fl6_mh_type = mh->ip6mh_type; } fl6->flowi6_proto = nexthdr; return; @@ -193,7 +193,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ESP: case IPPROTO_COMP: default: - fl6->uli.spi = 0; + fl6->fl6_ipsec_spi = 0; fl6->flowi6_proto = nexthdr; return; } -- cgit v1.1 From bef55aebd560c5a6f8883c421abccee39978c58c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 12 Mar 2011 17:17:10 -0500 Subject: decnet: Convert to use flowidn where applicable. Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 16 +-- net/decnet/dn_fib.c | 23 +++-- net/decnet/dn_nsp_out.c | 16 +-- net/decnet/dn_route.c | 269 ++++++++++++++++++++++++------------------------ net/decnet/dn_rules.c | 17 +-- net/decnet/dn_table.c | 6 +- 6 files changed, 177 insertions(+), 170 deletions(-) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index aafd15a..ea3b6ee 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -908,7 +908,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, struct socket *sock = sk->sk_socket; struct dn_scp *scp = DN_SK(sk); int err = -EISCONN; - struct flowi fl; + struct flowidn fld; if (sock->state == SS_CONNECTED) goto out; @@ -947,13 +947,13 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn)); err = -EHOSTUNREACH; - memset(&fl, 0, sizeof(fl)); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.fld_dst = dn_saddr2dn(&scp->peer); - fl.fld_src = dn_saddr2dn(&scp->addr); - dn_sk_ports_copy(&fl, scp); - fl.flowi_proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, flags) < 0) + memset(&fld, 0, sizeof(fld)); + fld.flowidn_oif = sk->sk_bound_dev_if; + fld.daddr = dn_saddr2dn(&scp->peer); + fld.saddr = dn_saddr2dn(&scp->addr); + dn_sk_ports_copy(&fld, scp); + fld.flowidn_proto = DNPROTO_NSP; + if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0) goto out; sk->sk_route_caps = sk->sk_dst_cache->dev->features; sock->state = SS_CONNECTING; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 4dfffa0..1c74ed3 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -201,7 +201,7 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct int err; if (nh->nh_gw) { - struct flowi fl; + struct flowidn fld; struct dn_fib_res res; if (nh->nh_flags&RTNH_F_ONLINK) { @@ -221,15 +221,15 @@ static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct return 0; } - memset(&fl, 0, sizeof(fl)); - fl.fld_dst = nh->nh_gw; - fl.flowi_oif = nh->nh_oif; - fl.fld_scope = r->rtm_scope + 1; + memset(&fld, 0, sizeof(fld)); + fld.daddr = nh->nh_gw; + fld.flowidn_oif = nh->nh_oif; + fld.flowidn_scope = r->rtm_scope + 1; - if (fl.fld_scope < RT_SCOPE_LINK) - fl.fld_scope = RT_SCOPE_LINK; + if (fld.flowidn_scope < RT_SCOPE_LINK) + fld.flowidn_scope = RT_SCOPE_LINK; - if ((err = dn_fib_lookup(&fl, &res)) != 0) + if ((err = dn_fib_lookup(&fld, &res)) != 0) return err; err = -EINVAL; @@ -404,7 +404,7 @@ failure: return NULL; } -int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi *fl, struct dn_fib_res *res) +int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res) { int err = dn_fib_props[type].error; @@ -424,7 +424,8 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi * for_nexthops(fi) { if (nh->nh_flags & RTNH_F_DEAD) continue; - if (!fl->flowi_oif || fl->flowi_oif == nh->nh_oif) + if (!fld->flowidn_oif || + fld->flowidn_oif == nh->nh_oif) break; } if (nhsel < fi->fib_nhs) { @@ -445,7 +446,7 @@ int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowi * return err; } -void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res) +void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; int w; diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index b3d6674..bd78836 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -78,7 +78,7 @@ static void dn_nsp_send(struct sk_buff *skb) struct sock *sk = skb->sk; struct dn_scp *scp = DN_SK(sk); struct dst_entry *dst; - struct flowi fl; + struct flowidn fld; skb_reset_transport_header(skb); scp->stamp = jiffies; @@ -91,13 +91,13 @@ try_again: return; } - memset(&fl, 0, sizeof(fl)); - fl.flowi_oif = sk->sk_bound_dev_if; - fl.fld_src = dn_saddr2dn(&scp->addr); - fl.fld_dst = dn_saddr2dn(&scp->peer); - dn_sk_ports_copy(&fl, scp); - fl.flowi_proto = DNPROTO_NSP; - if (dn_route_output_sock(&sk->sk_dst_cache, &fl, sk, 0) == 0) { + memset(&fld, 0, sizeof(fld)); + fld.flowidn_oif = sk->sk_bound_dev_if; + fld.saddr = dn_saddr2dn(&scp->addr); + fld.daddr = dn_saddr2dn(&scp->peer); + dn_sk_ports_copy(&fld, scp); + fld.flowidn_proto = DNPROTO_NSP; + if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) { dst = sk_dst_get(sk); sk->sk_route_caps = dst->dev->features; goto try_again; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index d74d34b..9f09d4f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -282,14 +282,14 @@ static void dn_dst_link_failure(struct sk_buff *skb) { } -static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) +static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2) { - return ((fl1->fld_dst ^ fl2->fld_dst) | - (fl1->fld_src ^ fl2->fld_src) | - (fl1->flowi_mark ^ fl2->flowi_mark) | - (fl1->fld_scope ^ fl2->fld_scope) | - (fl1->flowi_oif ^ fl2->flowi_oif) | - (fl1->flowi_iif ^ fl2->flowi_iif)) == 0; + return ((fl1->daddr ^ fl2->daddr) | + (fl1->saddr ^ fl2->saddr) | + (fl1->flowidn_mark ^ fl2->flowidn_mark) | + (fl1->flowidn_scope ^ fl2->flowidn_scope) | + (fl1->flowidn_oif ^ fl2->flowidn_oif) | + (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0; } static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) @@ -303,7 +303,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route * spin_lock_bh(&dn_rt_hash_table[hash].lock); while ((rth = rcu_dereference_protected(*rthp, lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) { - if (compare_keys(&rth->fl, &rt->fl)) { + if (compare_keys(&rth->fld, &rt->fld)) { /* Put it first */ *rthp = rth->dst.dn_next; rcu_assign_pointer(rth->dst.dn_next, @@ -903,15 +903,15 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re return (daddr&~mask)|res->fi->fib_nh->nh_gw; } -static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) +static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard) { - struct flowi fl = { - .fld_dst = oldflp->fld_dst, - .fld_src = oldflp->fld_src, - .fld_scope = RT_SCOPE_UNIVERSE, - .flowi_mark = oldflp->flowi_mark, - .flowi_iif = init_net.loopback_dev->ifindex, - .flowi_oif = oldflp->flowi_oif, + struct flowidn fld = { + .daddr = oldflp->daddr, + .saddr = oldflp->saddr, + .flowidn_scope = RT_SCOPE_UNIVERSE, + .flowidn_mark = oldflp->flowidn_mark, + .flowidn_iif = init_net.loopback_dev->ifindex, + .flowidn_oif = oldflp->flowidn_oif, }; struct dn_route *rt = NULL; struct net_device *dev_out = NULL, *dev; @@ -926,13 +926,14 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: dst=%04x src=%04x mark=%d" - " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), - le16_to_cpu(oldflp->fld_src), - oldflp->flowi_mark, init_net.loopback_dev->ifindex, oldflp->flowi_oif); + " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), + le16_to_cpu(oldflp->saddr), + oldflp->flowidn_mark, init_net.loopback_dev->ifindex, + oldflp->flowidn_oif); /* If we have an output interface, verify its a DECnet device */ - if (oldflp->flowi_oif) { - dev_out = dev_get_by_index(&init_net, oldflp->flowi_oif); + if (oldflp->flowidn_oif) { + dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif); err = -ENODEV; if (dev_out && dev_out->dn_ptr == NULL) { dev_put(dev_out); @@ -943,11 +944,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old } /* If we have a source address, verify that its a local address */ - if (oldflp->fld_src) { + if (oldflp->saddr) { err = -EADDRNOTAVAIL; if (dev_out) { - if (dn_dev_islocal(dev_out, oldflp->fld_src)) + if (dn_dev_islocal(dev_out, oldflp->saddr)) goto source_ok; dev_put(dev_out); goto out; @@ -956,11 +957,11 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old for_each_netdev_rcu(&init_net, dev) { if (!dev->dn_ptr) continue; - if (!dn_dev_islocal(dev, oldflp->fld_src)) + if (!dn_dev_islocal(dev, oldflp->saddr)) continue; if ((dev->flags & IFF_LOOPBACK) && - oldflp->fld_dst && - !dn_dev_islocal(dev, oldflp->fld_dst)) + oldflp->daddr && + !dn_dev_islocal(dev, oldflp->daddr)) continue; dev_out = dev; @@ -975,22 +976,22 @@ source_ok: } /* No destination? Assume its local */ - if (!fl.fld_dst) { - fl.fld_dst = fl.fld_src; + if (!fld.daddr) { + fld.daddr = fld.saddr; err = -EADDRNOTAVAIL; if (dev_out) dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); - if (!fl.fld_dst) { - fl.fld_dst = - fl.fld_src = dnet_select_source(dev_out, 0, + if (!fld.daddr) { + fld.daddr = + fld.saddr = dnet_select_source(dev_out, 0, RT_SCOPE_HOST); - if (!fl.fld_dst) + if (!fld.daddr) goto out; } - fl.flowi_oif = init_net.loopback_dev->ifindex; + fld.flowidn_oif = init_net.loopback_dev->ifindex; res.type = RTN_LOCAL; goto make_route; } @@ -999,8 +1000,8 @@ source_ok: printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." " dst=%o4x src=%04x oif=%d try_hard=%d\n", - le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), - fl.flowi_oif, try_hard); + le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr), + fld.flowidn_oif, try_hard); /* * N.B. If the kernel is compiled without router support then @@ -1008,7 +1009,7 @@ source_ok: * will always be executed. */ err = -ESRCH; - if (try_hard || (err = dn_fib_lookup(&fl, &res)) != 0) { + if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) { struct dn_dev *dn_db; if (err != -ESRCH) goto out; @@ -1023,19 +1024,19 @@ source_ok: * here */ if (!try_hard) { - neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); + neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr); if (neigh) { - if ((oldflp->flowi_oif && - (neigh->dev->ifindex != oldflp->flowi_oif)) || - (oldflp->fld_src && + if ((oldflp->flowidn_oif && + (neigh->dev->ifindex != oldflp->flowidn_oif)) || + (oldflp->saddr && (!dn_dev_islocal(neigh->dev, - oldflp->fld_src)))) { + oldflp->saddr)))) { neigh_release(neigh); neigh = NULL; } else { if (dev_out) dev_put(dev_out); - if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { + if (dn_dev_islocal(neigh->dev, fld.daddr)) { dev_out = init_net.loopback_dev; res.type = RTN_LOCAL; } else { @@ -1055,7 +1056,7 @@ source_ok: goto out; dn_db = rcu_dereference_raw(dev_out->dn_ptr); /* Possible improvement - check all devices for local addr */ - if (dn_dev_islocal(dev_out, fl.fld_dst)) { + if (dn_dev_islocal(dev_out, fld.daddr)) { dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); @@ -1071,16 +1072,16 @@ select_source: if (neigh) gateway = ((struct dn_neigh *)neigh)->addr; if (gateway == 0) - gateway = fl.fld_dst; - if (fl.fld_src == 0) { - fl.fld_src = dnet_select_source(dev_out, gateway, - res.type == RTN_LOCAL ? - RT_SCOPE_HOST : - RT_SCOPE_LINK); - if (fl.fld_src == 0 && res.type != RTN_LOCAL) + gateway = fld.daddr; + if (fld.saddr == 0) { + fld.saddr = dnet_select_source(dev_out, gateway, + res.type == RTN_LOCAL ? + RT_SCOPE_HOST : + RT_SCOPE_LINK); + if (fld.saddr == 0 && res.type != RTN_LOCAL) goto e_addr; } - fl.flowi_oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; goto make_route; } free_res = 1; @@ -1089,35 +1090,35 @@ select_source: goto e_inval; if (res.type == RTN_LOCAL) { - if (!fl.fld_src) - fl.fld_src = fl.fld_dst; + if (!fld.saddr) + fld.saddr = fld.daddr; if (dev_out) dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); - fl.flowi_oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; if (res.fi) dn_fib_info_put(res.fi); res.fi = NULL; goto make_route; } - if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) - dn_fib_select_multipath(&fl, &res); + if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) + dn_fib_select_multipath(&fld, &res); /* * We could add some logic to deal with default routes here and * get rid of some of the special casing above. */ - if (!fl.fld_src) - fl.fld_src = DN_FIB_RES_PREFSRC(res); + if (!fld.saddr) + fld.saddr = DN_FIB_RES_PREFSRC(res); if (dev_out) dev_put(dev_out); dev_out = DN_FIB_RES_DEV(res); dev_hold(dev_out); - fl.flowi_oif = dev_out->ifindex; + fld.flowidn_oif = dev_out->ifindex; gateway = DN_FIB_RES_GW(res); make_route: @@ -1131,19 +1132,19 @@ make_route: atomic_set(&rt->dst.__refcnt, 1); rt->dst.flags = DST_HOST; - rt->fl.fld_src = oldflp->fld_src; - rt->fl.fld_dst = oldflp->fld_dst; - rt->fl.flowi_oif = oldflp->flowi_oif; - rt->fl.flowi_iif = 0; - rt->fl.flowi_mark = oldflp->flowi_mark; + rt->fld.saddr = oldflp->saddr; + rt->fld.daddr = oldflp->daddr; + rt->fld.flowidn_oif = oldflp->flowidn_oif; + rt->fld.flowidn_iif = 0; + rt->fld.flowidn_mark = oldflp->flowidn_mark; - rt->rt_saddr = fl.fld_src; - rt->rt_daddr = fl.fld_dst; - rt->rt_gateway = gateway ? gateway : fl.fld_dst; - rt->rt_local_src = fl.fld_src; + rt->rt_saddr = fld.saddr; + rt->rt_daddr = fld.daddr; + rt->rt_gateway = gateway ? gateway : fld.daddr; + rt->rt_local_src = fld.saddr; - rt->rt_dst_map = fl.fld_dst; - rt->rt_src_map = fl.fld_src; + rt->rt_dst_map = fld.daddr; + rt->rt_src_map = fld.saddr; rt->dst.dev = dev_out; dev_hold(dev_out); @@ -1161,7 +1162,7 @@ make_route: if (err) goto e_neighbour; - hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); + hash = dn_hash(rt->fld.saddr, rt->fld.daddr); dn_insert_route(rt, hash, (struct dn_route **)pprt); done: @@ -1192,20 +1193,20 @@ e_neighbour: /* * N.B. The flags may be moved into the flowi at some future stage. */ -static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *flp, int flags) +static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags) { - unsigned hash = dn_hash(flp->fld_src, flp->fld_dst); + unsigned hash = dn_hash(flp->saddr, flp->daddr); struct dn_route *rt = NULL; if (!(flags & MSG_TRYHARD)) { rcu_read_lock_bh(); for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; rt = rcu_dereference_bh(rt->dst.dn_next)) { - if ((flp->fld_dst == rt->fl.fld_dst) && - (flp->fld_src == rt->fl.fld_src) && - (flp->flowi_mark == rt->fl.flowi_mark) && + if ((flp->daddr == rt->fld.daddr) && + (flp->saddr == rt->fld.saddr) && + (flp->flowidn_mark == rt->fld.flowidn_mark) && dn_is_output_route(rt) && - (rt->fl.flowi_oif == flp->flowi_oif)) { + (rt->fld.flowidn_oif == flp->flowidn_oif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock_bh(); *pprt = &rt->dst; @@ -1218,13 +1219,14 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl return dn_route_output_slow(pprt, flp, flags); } -static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int flags) +static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags) { int err; err = __dn_route_output_key(pprt, flp, flags); - if (err == 0 && flp->flowi_proto) { - *pprt = xfrm_lookup(&init_net, *pprt, flp, NULL, 0); + if (err == 0 && flp->flowidn_proto) { + *pprt = xfrm_lookup(&init_net, *pprt, + flowidn_to_flowi(flp), NULL, 0); if (IS_ERR(*pprt)) { err = PTR_ERR(*pprt); *pprt = NULL; @@ -1233,15 +1235,16 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f return err; } -int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock *sk, int flags) +int dn_route_output_sock(struct dst_entry **pprt, struct flowidn *fl, struct sock *sk, int flags) { int err; err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); - if (err == 0 && fl->flowi_proto) { + if (err == 0 && fl->flowidn_proto) { if (!(flags & MSG_DONTWAIT)) - fl->flowi_flags |= FLOWI_FLAG_CAN_SLEEP; - *pprt = xfrm_lookup(&init_net, *pprt, fl, sk, 0); + fl->flowidn_flags |= FLOWI_FLAG_CAN_SLEEP; + *pprt = xfrm_lookup(&init_net, *pprt, + flowidn_to_flowi(fl), sk, 0); if (IS_ERR(*pprt)) { err = PTR_ERR(*pprt); *pprt = NULL; @@ -1262,12 +1265,12 @@ static int dn_route_input_slow(struct sk_buff *skb) int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { - .fld_dst = cb->dst, - .fld_src = cb->src, - .fld_scope = RT_SCOPE_UNIVERSE, - .flowi_mark = skb->mark, - .flowi_iif = skb->dev->ifindex, + struct flowidn fld = { + .daddr = cb->dst, + .saddr = cb->src, + .flowidn_scope = RT_SCOPE_UNIVERSE, + .flowidn_mark = skb->mark, + .flowidn_iif = skb->dev->ifindex, }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; int err = -EINVAL; @@ -1279,7 +1282,7 @@ static int dn_route_input_slow(struct sk_buff *skb) goto out; /* Zero source addresses are not allowed */ - if (fl.fld_src == 0) + if (fld.saddr == 0) goto out; /* @@ -1293,7 +1296,7 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_dev_islocal(in_dev, cb->src)) goto out; - err = dn_fib_lookup(&fl, &res); + err = dn_fib_lookup(&fld, &res); if (err) { if (err != -ESRCH) goto out; @@ -1305,7 +1308,7 @@ static int dn_route_input_slow(struct sk_buff *skb) res.type = RTN_LOCAL; } else { - __le16 src_map = fl.fld_src; + __le16 src_map = fld.saddr; free_res = 1; out_dev = DN_FIB_RES_DEV(res); @@ -1318,22 +1321,22 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(out_dev); if (res.r) - src_map = fl.fld_src; /* no NAT support for now */ + src_map = fld.saddr; /* no NAT support for now */ gateway = DN_FIB_RES_GW(res); if (res.type == RTN_NAT) { - fl.fld_dst = dn_fib_rules_map_destination(fl.fld_dst, &res); + fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res); dn_fib_res_put(&res); free_res = 0; - if (dn_fib_lookup(&fl, &res)) + if (dn_fib_lookup(&fld, &res)) goto e_inval; free_res = 1; if (res.type != RTN_UNICAST) goto e_inval; flags |= RTCF_DNAT; - gateway = fl.fld_dst; + gateway = fld.daddr; } - fl.fld_src = src_map; + fld.saddr = src_map; } switch(res.type) { @@ -1347,8 +1350,8 @@ static int dn_route_input_slow(struct sk_buff *skb) if (dn_db->parms.forwarding == 0) goto e_inval; - if (res.fi->fib_nhs > 1 && fl.flowi_oif == 0) - dn_fib_select_multipath(&fl, &res); + if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0) + dn_fib_select_multipath(&fld, &res); /* * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT @@ -1366,8 +1369,8 @@ static int dn_route_input_slow(struct sk_buff *skb) break; case RTN_LOCAL: flags |= RTCF_LOCAL; - fl.fld_src = cb->dst; - fl.fld_dst = cb->src; + fld.saddr = cb->dst; + fld.daddr = cb->src; /* Routing tables gave us a gateway */ if (gateway) @@ -1400,21 +1403,21 @@ make_route: if (rt == NULL) goto e_nobufs; - rt->rt_saddr = fl.fld_src; - rt->rt_daddr = fl.fld_dst; - rt->rt_gateway = fl.fld_dst; + rt->rt_saddr = fld.saddr; + rt->rt_daddr = fld.daddr; + rt->rt_gateway = fld.daddr; if (gateway) rt->rt_gateway = gateway; rt->rt_local_src = local_src ? local_src : rt->rt_saddr; - rt->rt_dst_map = fl.fld_dst; - rt->rt_src_map = fl.fld_src; + rt->rt_dst_map = fld.daddr; + rt->rt_src_map = fld.saddr; - rt->fl.fld_src = cb->src; - rt->fl.fld_dst = cb->dst; - rt->fl.flowi_oif = 0; - rt->fl.flowi_iif = in_dev->ifindex; - rt->fl.flowi_mark = fl.flowi_mark; + rt->fld.saddr = cb->src; + rt->fld.daddr = cb->dst; + rt->fld.flowidn_oif = 0; + rt->fld.flowidn_iif = in_dev->ifindex; + rt->fld.flowidn_mark = fld.flowidn_mark; rt->dst.flags = DST_HOST; rt->dst.neighbour = neigh; @@ -1444,7 +1447,7 @@ make_route: if (err) goto e_neighbour; - hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); + hash = dn_hash(rt->fld.saddr, rt->fld.daddr); dn_insert_route(rt, hash, &rt); skb_dst_set(skb, &rt->dst); @@ -1484,11 +1487,11 @@ static int dn_route_input(struct sk_buff *skb) rcu_read_lock(); for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; rt = rcu_dereference(rt->dst.dn_next)) { - if ((rt->fl.fld_src == cb->src) && - (rt->fl.fld_dst == cb->dst) && - (rt->fl.flowi_oif == 0) && - (rt->fl.flowi_mark == skb->mark) && - (rt->fl.flowi_iif == cb->iif)) { + if ((rt->fld.saddr == cb->src) && + (rt->fld.daddr == cb->dst) && + (rt->fld.flowidn_oif == 0) && + (rt->fld.flowidn_mark == skb->mark) && + (rt->fld.flowidn_iif == cb->iif)) { dst_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); @@ -1524,9 +1527,9 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr); - if (rt->fl.fld_src) { + if (rt->fld.saddr) { r->rtm_src_len = 16; - RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); + RTA_PUT(skb, RTA_SRC, 2, &rt->fld.saddr); } if (rt->dst.dev) RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex); @@ -1545,7 +1548,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, rt->dst.error) < 0) goto rtattr_failure; if (dn_is_input_route(rt)) - RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.flowi_iif); + RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fld.flowidn_iif); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; @@ -1568,13 +1571,13 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void struct dn_skb_cb *cb; int err; struct sk_buff *skb; - struct flowi fl; + struct flowidn fld; if (!net_eq(net, &init_net)) return -EINVAL; - memset(&fl, 0, sizeof(fl)); - fl.flowi_proto = DNPROTO_NSP; + memset(&fld, 0, sizeof(fld)); + fld.flowidn_proto = DNPROTO_NSP; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) @@ -1583,15 +1586,15 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void cb = DN_SKB_CB(skb); if (rta[RTA_SRC-1]) - memcpy(&fl.fld_src, RTA_DATA(rta[RTA_SRC-1]), 2); + memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); if (rta[RTA_DST-1]) - memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2); + memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); if (rta[RTA_IIF-1]) - memcpy(&fl.flowi_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); - if (fl.flowi_iif) { + if (fld.flowidn_iif) { struct net_device *dev; - if ((dev = dev_get_by_index(&init_net, fl.flowi_iif)) == NULL) { + if ((dev = dev_get_by_index(&init_net, fld.flowidn_iif)) == NULL) { kfree_skb(skb); return -ENODEV; } @@ -1602,8 +1605,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void } skb->protocol = htons(ETH_P_DNA_RT); skb->dev = dev; - cb->src = fl.fld_src; - cb->dst = fl.fld_dst; + cb->src = fld.saddr; + cb->dst = fld.daddr; local_bh_disable(); err = dn_route_input(skb); local_bh_enable(); @@ -1615,8 +1618,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void int oif = 0; if (rta[RTA_OIF - 1]) memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.flowi_oif = oif; - err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0); + fld.flowidn_oif = oif; + err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); } if (skb->dev) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 6eb91df..f0efb0c 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -49,14 +49,15 @@ struct dn_fib_rule }; -int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) +int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res) { struct fib_lookup_arg arg = { .result = res, }; int err; - err = fib_rules_lookup(dn_fib_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(dn_fib_rules_ops, + flowidn_to_flowi(flp), 0, &arg); res->r = arg.rule; return err; @@ -65,6 +66,7 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { + struct flowidn *fld = &flp->u.dn; int err = -EAGAIN; struct dn_fib_table *tbl; @@ -90,7 +92,7 @@ static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, if (tbl == NULL) goto errout; - err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); + err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result); if (err > 0) err = -EAGAIN; errout: @@ -104,8 +106,9 @@ static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = { static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - __le16 daddr = fl->fld_dst; - __le16 saddr = fl->fld_src; + struct flowidn *fld = &fl->u.dn; + __le16 daddr = fld->daddr; + __le16 saddr = fld->saddr; if (((saddr ^ r->src) & r->srcmask) || ((daddr ^ r->dst) & r->dstmask)) @@ -175,7 +178,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, unsigned dnet_addr_type(__le16 addr) { - struct flowi fl = { .fld_dst = addr }; + struct flowidn fld = { .daddr = addr }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); @@ -183,7 +186,7 @@ unsigned dnet_addr_type(__le16 addr) res.r = NULL; if (tb) { - if (!tb->lookup(tb, &fl, &res)) { + if (!tb->lookup(tb, &fld, &res)) { ret = res.type; dn_fib_res_put(&res); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index b66600b..99d8d3a 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -764,7 +764,7 @@ static int dn_fib_table_flush(struct dn_fib_table *tb) return found; } -static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, struct dn_fib_res *res) +static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res) { int err; struct dn_zone *dz; @@ -773,7 +773,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, read_lock(&dn_fib_tables_lock); for(dz = t->dh_zone_list; dz; dz = dz->dz_next) { struct dn_fib_node *f; - dn_fib_key_t k = dz_key(flp->fld_dst, dz); + dn_fib_key_t k = dz_key(flp->daddr, dz); for(f = dz_chain(k, dz); f; f = f->fn_next) { if (!dn_key_eq(k, f->fn_key)) { @@ -788,7 +788,7 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, if (f->fn_state&DN_S_ZOMBIE) continue; - if (f->fn_scope < flp->fld_scope) + if (f->fn_scope < flp->flowidn_scope) continue; err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res); -- cgit v1.1 From 0232fd0ac475e5f5ec19a69d53467be11bec9833 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 21 Feb 2011 09:45:40 -0500 Subject: tipc: Allow receiving into iovec containing multiple entries Enhances TIPC's socket receive routines to support iovec structures containing more than a single entry. This change leverages existing sk_buff routines to do most of the work; the only significant change to TIPC itself is that an sk_buff now records how much data has been already consumed as an numeric offset, rather than as a pointer to the first unread data byte. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 125dcb0..d45a294 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -289,7 +289,7 @@ static int release(struct socket *sock) if (buf == NULL) break; atomic_dec(&tipc_queue_size); - if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) + if (TIPC_SKB_CB(buf)->handle != 0) buf_discard(buf); else { if ((sock->state == SS_CONNECTING) || @@ -917,9 +917,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, /* Catch invalid receive requests */ - if (m->msg_iovlen != 1) - return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */ - if (unlikely(!buf_len)) return -EINVAL; @@ -991,11 +988,10 @@ restart: sz = buf_len; m->msg_flags |= MSG_TRUNC; } - if (unlikely(copy_to_user(m->msg_iov->iov_base, msg_data(msg), - sz))) { - res = -EFAULT; + res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg), + m->msg_iov, sz); + if (res) goto exit; - } res = sz; } else { if ((sock->state == SS_READY) || @@ -1041,16 +1037,11 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, unsigned int sz; int sz_to_copy, target, needed; int sz_copied = 0; - char __user *crs = m->msg_iov->iov_base; - unsigned char *buf_crs; u32 err; int res = 0; /* Catch invalid receive attempts */ - if (m->msg_iovlen != 1) - return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */ - if (unlikely(!buf_len)) return -EINVAL; @@ -1112,24 +1103,25 @@ restart: /* Capture message data (if valid) & compute return value (always) */ if (!err) { - buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle); - sz = (unsigned char *)msg + msg_size(msg) - buf_crs; + u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); + sz -= offset; needed = (buf_len - sz_copied); sz_to_copy = (sz <= needed) ? sz : needed; - if (unlikely(copy_to_user(crs, buf_crs, sz_to_copy))) { - res = -EFAULT; + + res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset, + m->msg_iov, sz_to_copy); + if (res) goto exit; - } + sz_copied += sz_to_copy; if (sz_to_copy < sz) { if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->handle = buf_crs + sz_to_copy; + TIPC_SKB_CB(buf)->handle = + (void *)(unsigned long)(offset + sz_to_copy); goto exit; } - - crs += sz_to_copy; } else { if (sz_copied != 0) goto exit; /* can't add error msg to valid data */ @@ -1256,7 +1248,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) /* Enqueue message (finally!) */ - TIPC_SKB_CB(buf)->handle = msg_data(msg); + TIPC_SKB_CB(buf)->handle = 0; atomic_inc(&tipc_queue_size); __skb_queue_tail(&sk->sk_receive_queue, buf); @@ -1608,7 +1600,7 @@ restart: buf = __skb_dequeue(&sk->sk_receive_queue); if (buf) { atomic_dec(&tipc_queue_size); - if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) { + if (TIPC_SKB_CB(buf)->handle != 0) { buf_discard(buf); goto restart; } -- cgit v1.1 From aa8472948487432bacbd099b86e313bc16319495 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 21 Feb 2011 09:45:31 -0500 Subject: tipc: Correct broadcast link peer info when displaying links Fixes a typo in the calculation of the network address of a node's own cluster when generating a response to the configuration command that lists all of the node's links. The correct mask value for a network address uses 1's for the 8-bit zone and 12-bit cluster parts and 0's for the 12-bit node part. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/node.c b/net/tipc/node.c index e4dba1d..d040d47 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -470,7 +470,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Add TLV for broadcast link */ - link_info.dest = htonl(tipc_own_addr & 0xfffff00); + link_info.dest = htonl(tipc_own_addr & 0xfffff000); link_info.up = htonl(1); strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); -- cgit v1.1 From a3796f895ff2917aea331a8d40036c73452b2203 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Wed, 23 Feb 2011 11:44:49 -0500 Subject: tipc: Add network address mask helper routines Introduces a pair of helper routines that convert the network address for a TIPC node into the network address for its cluster or zone. This is a cosmetic change designed to avoid future errors caused by the incorrect use of address bitmasks, and does not alter the existing operation of TIPC. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/addr.c | 4 ++-- net/tipc/addr.h | 17 +++++++++++++---- net/tipc/node.c | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 88463d9..087e399 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -81,9 +81,9 @@ int tipc_in_scope(u32 domain, u32 addr) { if (!domain || (domain == addr)) return 1; - if (domain == (addr & 0xfffff000u)) /* domain */ + if (domain == tipc_cluster_mask(addr)) /* domain */ return 1; - if (domain == (addr & 0xff000000u)) /* domain */ + if (domain == tipc_zone_mask(addr)) /* domain */ return 1; return 0; } diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 2490fad..8971aba 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -37,6 +37,16 @@ #ifndef _TIPC_ADDR_H #define _TIPC_ADDR_H +static inline u32 tipc_zone_mask(u32 addr) +{ + return addr & 0xff000000u; +} + +static inline u32 tipc_cluster_mask(u32 addr) +{ + return addr & 0xfffff000u; +} + static inline int in_own_cluster(u32 addr) { return !((addr ^ tipc_own_addr) >> 12); @@ -49,14 +59,13 @@ static inline int in_own_cluster(u32 addr) * after a network hop. */ -static inline int addr_domain(int sc) +static inline u32 addr_domain(u32 sc) { if (likely(sc == TIPC_NODE_SCOPE)) return tipc_own_addr; if (sc == TIPC_CLUSTER_SCOPE) - return tipc_addr(tipc_zone(tipc_own_addr), - tipc_cluster(tipc_own_addr), 0); - return tipc_addr(tipc_zone(tipc_own_addr), 0, 0); + return tipc_cluster_mask(tipc_own_addr); + return tipc_zone_mask(tipc_own_addr); } int tipc_addr_domain_valid(u32); diff --git a/net/tipc/node.c b/net/tipc/node.c index d040d47..14f98c8 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -470,7 +470,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Add TLV for broadcast link */ - link_info.dest = htonl(tipc_own_addr & 0xfffff000); + link_info.dest = htonl(tipc_cluster_mask(tipc_own_addr)); link_info.up = htonl(1); strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); -- cgit v1.1 From 431697eb60d2d36614096aff12bd1b826a9f9bc1 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Wed, 23 Feb 2011 13:51:15 -0500 Subject: tipc: Prevent null pointer error when removing a node subscription Prevents a null pointer dereference from occurring if a node subscription is triggered at the same time that the subscribing port or publication is terminating the subscription. The problem arises if the triggering routine asynchronously activates and deregisters the node subscription while deregistration is already underway -- the deregistration routine may find that the pointer it has just verified to be non-NULL is now NULL. To avoid this race condition the triggering routine now simply marks the node subscription as defunct (to prevent it from re-activating) instead of deregistering it. The subscription is now both deregistered and destroyed only when the subscribing port or publication code terminates the node subscription. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/name_distr.c | 5 +++-- net/tipc/node.c | 13 +++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 483c226..1d4a18a 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -2,7 +2,7 @@ * net/tipc/name_distr.c: TIPC name distribution code * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -221,7 +221,6 @@ exit: * In rare cases the link may have come back up again when this * function is called, and we have two items representing the same * publication. Nudge this item's key to distinguish it from the other. - * (Note: Publication's node subscription is already unsubscribed.) */ static void node_is_down(struct publication *publ) @@ -232,6 +231,8 @@ static void node_is_down(struct publication *publ) publ->key += 1222345; p = tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); + if (p) + tipc_nodesub_unsubscribe(&p->subscr); write_unlock_bh(&tipc_nametbl_lock); if (p != publ) { diff --git a/net/tipc/node.c b/net/tipc/node.c index 14f98c8..8926caa 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -327,7 +327,7 @@ static void node_cleanup_finished(unsigned long node_addr) static void node_lost_contact(struct tipc_node *n_ptr) { - struct tipc_node_subscr *ns, *tns; + struct tipc_node_subscr *ns; char addr_string[16]; u32 i; @@ -365,11 +365,12 @@ static void node_lost_contact(struct tipc_node *n_ptr) } /* Notify subscribers */ - list_for_each_entry_safe(ns, tns, &n_ptr->nsub, nodesub_list) { - ns->node = NULL; - list_del_init(&ns->nodesub_list); - tipc_k_signal((Handler)ns->handle_node_down, - (unsigned long)ns->usr_handle); + list_for_each_entry(ns, &n_ptr->nsub, nodesub_list) { + if (ns->handle_node_down) { + tipc_k_signal((Handler)ns->handle_node_down, + (unsigned long)ns->usr_handle); + ns->handle_node_down = NULL; + } } /* Prevent re-contact with node until all cleanup is done */ -- cgit v1.1 From f1379173326de4c745c4f610501486e4f3bd9248 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Wed, 23 Feb 2011 14:13:41 -0500 Subject: tipc: Cosmetic changes to node subscription code Relocates the code that notifies users of node subscriptions so that it is adjacent to the rest of the routines that implement TIPC's node subscription capability. Renames the name table routine that is invoked by a node subscription to better reflect its purpose and to be consistent with other, similar name table routines. These changes are cosmetic in nature, and do not alter the behavior of TIPC. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/name_distr.c | 7 ++++--- net/tipc/node.c | 9 +-------- net/tipc/node_subscr.c | 21 ++++++++++++++++++++- net/tipc/node_subscr.h | 3 ++- 4 files changed, 27 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 1d4a18a..d58dae7 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -214,7 +214,7 @@ exit: } /** - * node_is_down - remove publication associated with a failed node + * named_purge_publ - remove publication associated with a failed node * * Invoked for each publication issued by a newly failed node. * Removes publication structure from name table & deletes it. @@ -223,7 +223,7 @@ exit: * publication. Nudge this item's key to distinguish it from the other. */ -static void node_is_down(struct publication *publ) +static void named_purge_publ(struct publication *publ) { struct publication *p; @@ -269,7 +269,8 @@ void tipc_named_recv(struct sk_buff *buf) tipc_nodesub_subscribe(&publ->subscr, msg_orignode(msg), publ, - (net_ev_handler)node_is_down); + (net_ev_handler) + named_purge_publ); } } else if (msg_type(msg) == WITHDRAWAL) { publ = tipc_nametbl_remove_publ(ntohl(item->type), diff --git a/net/tipc/node.c b/net/tipc/node.c index 8926caa..713ab5d 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -327,7 +327,6 @@ static void node_cleanup_finished(unsigned long node_addr) static void node_lost_contact(struct tipc_node *n_ptr) { - struct tipc_node_subscr *ns; char addr_string[16]; u32 i; @@ -365,13 +364,7 @@ static void node_lost_contact(struct tipc_node *n_ptr) } /* Notify subscribers */ - list_for_each_entry(ns, &n_ptr->nsub, nodesub_list) { - if (ns->handle_node_down) { - tipc_k_signal((Handler)ns->handle_node_down, - (unsigned long)ns->usr_handle); - ns->handle_node_down = NULL; - } - } + tipc_nodesub_notify(n_ptr); /* Prevent re-contact with node until all cleanup is done */ diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 018a553..c3c2815 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -2,7 +2,7 @@ * net/tipc/node_subscr.c: TIPC "node down" subscription handling * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -76,3 +76,22 @@ void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub) list_del_init(&node_sub->nodesub_list); tipc_node_unlock(node_sub->node); } + +/** + * tipc_nodesub_notify - notify subscribers that a node is unreachable + * + * Note: node is locked by caller + */ + +void tipc_nodesub_notify(struct tipc_node *node) +{ + struct tipc_node_subscr *ns; + + list_for_each_entry(ns, &node->nsub, nodesub_list) { + if (ns->handle_node_down) { + tipc_k_signal((Handler)ns->handle_node_down, + (unsigned long)ns->usr_handle); + ns->handle_node_down = NULL; + } + } +} diff --git a/net/tipc/node_subscr.h b/net/tipc/node_subscr.h index 006ed73..4bc2ca0 100644 --- a/net/tipc/node_subscr.h +++ b/net/tipc/node_subscr.h @@ -2,7 +2,7 @@ * net/tipc/node_subscr.h: Include file for TIPC "node down" subscription handling * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,5 +59,6 @@ struct tipc_node_subscr { void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, void *usr_handle, net_ev_handler handle_down); void tipc_nodesub_unsubscribe(struct tipc_node_subscr *node_sub); +void tipc_nodesub_notify(struct tipc_node *node); #endif -- cgit v1.1 From 71092ea122062012f8e4b7fb2f9a747212d1479c Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Wed, 23 Feb 2011 14:52:14 -0500 Subject: tipc: Add support for SO_RCVTIMEO socket option Adds support for the SO_RCVTIMEO socket option to TIPC's socket receive routines. Thanks go out to Raj Hegde for his contribution to the development and testing this enhancement. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d45a294..29d94d5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -58,6 +58,9 @@ struct tipc_sock { #define tipc_sk(sk) ((struct tipc_sock *)(sk)) #define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p)) +#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \ + (sock->state == SS_DISCONNECTING)) + static int backlog_rcv(struct sock *sk, struct sk_buff *skb); static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); @@ -911,6 +914,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; + long timeout; unsigned int sz; u32 err; int res; @@ -927,6 +931,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ @@ -936,17 +941,15 @@ restart: res = -ENOTCONN; goto exit; } - if (flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout <= 0L) { + res = timeout ? timeout : -EWOULDBLOCK; goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue) || - (sock->state == SS_DISCONNECTING))); + timeout = wait_event_interruptible_timeout(*sk_sleep(sk), + tipc_rx_ready(sock), + timeout); lock_sock(sk); - if (res) - goto exit; } /* Look at first message in receive queue */ @@ -1034,6 +1037,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; + long timeout; unsigned int sz; int sz_to_copy, target, needed; int sz_copied = 0; @@ -1054,7 +1058,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, } target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); - + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ @@ -1064,17 +1068,15 @@ restart: res = -ENOTCONN; goto exit; } - if (flags & MSG_DONTWAIT) { - res = -EWOULDBLOCK; + if (timeout <= 0L) { + res = timeout ? timeout : -EWOULDBLOCK; goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue) || - (sock->state == SS_DISCONNECTING))); + timeout = wait_event_interruptible_timeout(*sk_sleep(sk), + tipc_rx_ready(sock), + timeout); lock_sock(sk); - if (res) - goto exit; } /* Look at first message in receive queue */ -- cgit v1.1 From 9df3b7eb6ec1c7734482f782bf8335a2737c02f0 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Thu, 24 Feb 2011 13:20:20 -0500 Subject: tipc: Fix problem with missing link in "tipc-config -l" output Removes a race condition that could cause TIPC's internal counter of the number of links it has to neighboring nodes to have the incorrect value if two independent threads of control simultaneously create new link endpoints connecting to two different nodes using two different bearers. Such under counting would result in TIPC failing to list the final link(s) in its response to a configuration request to list all of the node's links. The counter is now updated atomically to ensure that simultaneous increments do not interfere with each other. Thanks go to Peter Butler for his assistance in diagnosing and fixing this problem. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/net.c | 3 ++- net/tipc/net.h | 4 ++-- net/tipc/node.c | 7 ++++--- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/tipc/net.c b/net/tipc/net.c index 9bacfd0..dd78d86 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -2,7 +2,7 @@ * net/tipc/net.c: TIPC network routing code * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -115,6 +115,7 @@ static int net_start(void) tipc_net.nodes = kcalloc(tipc_max_nodes + 1, sizeof(*tipc_net.nodes), GFP_ATOMIC); tipc_net.highest_node = 0; + atomic_set(&tipc_net.links, 0); return tipc_net.nodes ? 0 : -ENOMEM; } diff --git a/net/tipc/net.h b/net/tipc/net.h index 4ae59ad..aa431ef 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -2,7 +2,7 @@ * net/tipc/net.h: Include file for TIPC network routing code * * Copyright (c) 1995-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -49,7 +49,7 @@ struct tipc_node; struct network { struct tipc_node **nodes; u32 highest_node; - u32 links; + atomic_t links; }; diff --git a/net/tipc/node.c b/net/tipc/node.c index 713ab5d..a24fad3 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -233,7 +233,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr) if (!n_ptr->links[bearer_id]) { n_ptr->links[bearer_id] = l_ptr; - tipc_net.links++; + atomic_inc(&tipc_net.links); n_ptr->link_cnt++; return n_ptr; } @@ -247,7 +247,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr) void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = NULL; - tipc_net.links--; + atomic_dec(&tipc_net.links); n_ptr->link_cnt--; } @@ -450,7 +450,8 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Get space for all unicast links + multicast link */ - payload_size = TLV_SPACE(sizeof(link_info)) * (tipc_net.links + 1); + payload_size = TLV_SPACE(sizeof(link_info)) * + (atomic_read(&tipc_net.links) + 1); if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED -- cgit v1.1 From d1bcb11544109114d72965afea7805cc3e16a83a Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 25 Feb 2011 10:01:58 -0500 Subject: tipc: Split up unified structure of network-related variables Converts the fields of the global "tipc_net" structure into individual variables. Since the struct was never referenced as a complete unit, its existence was pointless. This will facilitate upcoming changes to TIPC's node table and simpify upcoming relocation of the variables so they are only visible to the files that actually use them. This change is essentially cosmetic in nature, and doesn't affect the operation of TIPC. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/name_distr.c | 4 ++-- net/tipc/net.c | 22 ++++++++++++---------- net/tipc/net.h | 17 +++-------------- net/tipc/node.c | 30 +++++++++++++++--------------- net/tipc/node.h | 2 +- 5 files changed, 33 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index d58dae7..f2086f6 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -111,8 +111,8 @@ static void named_cluster_distribute(struct sk_buff *buf) struct tipc_node *n_ptr; u32 n_num; - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { - n_ptr = tipc_net.nodes[n_num]; + for (n_num = 1; n_num <= tipc_highest_node; n_num++) { + n_ptr = tipc_nodes[n_num]; if (n_ptr && tipc_node_has_active_links(n_ptr)) { buf_copy = skb_copy(buf, GFP_ATOMIC); if (!buf_copy) diff --git a/net/tipc/net.c b/net/tipc/net.c index dd78d86..f6303d7 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -108,26 +108,28 @@ */ DEFINE_RWLOCK(tipc_net_lock); -struct network tipc_net; +struct tipc_node **tipc_nodes; +u32 tipc_highest_node; +atomic_t tipc_num_links; static int net_start(void) { - tipc_net.nodes = kcalloc(tipc_max_nodes + 1, - sizeof(*tipc_net.nodes), GFP_ATOMIC); - tipc_net.highest_node = 0; - atomic_set(&tipc_net.links, 0); + tipc_nodes = kcalloc(tipc_max_nodes + 1, + sizeof(*tipc_nodes), GFP_ATOMIC); + tipc_highest_node = 0; + atomic_set(&tipc_num_links, 0); - return tipc_net.nodes ? 0 : -ENOMEM; + return tipc_nodes ? 0 : -ENOMEM; } static void net_stop(void) { u32 n_num; - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) - tipc_node_delete(tipc_net.nodes[n_num]); - kfree(tipc_net.nodes); - tipc_net.nodes = NULL; + for (n_num = 1; n_num <= tipc_highest_node; n_num++) + tipc_node_delete(tipc_nodes[n_num]); + kfree(tipc_nodes); + tipc_nodes = NULL; } static void net_route_named_msg(struct sk_buff *buf) diff --git a/net/tipc/net.h b/net/tipc/net.h index aa431ef..b52b974 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -39,21 +39,10 @@ struct tipc_node; -/** - * struct network - TIPC network structure - * @nodes: array of pointers to all nodes within cluster - * @highest_node: id of highest numbered node within cluster - * @links: number of (unicast) links to cluster - */ - -struct network { - struct tipc_node **nodes; - u32 highest_node; - atomic_t links; -}; - +extern struct tipc_node **tipc_nodes; +extern u32 tipc_highest_node; +extern atomic_t tipc_num_links; -extern struct network tipc_net; extern rwlock_t tipc_net_lock; void tipc_net_route_msg(struct sk_buff *buf); diff --git a/net/tipc/node.c b/net/tipc/node.c index a24fad3..64976f2 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -81,9 +81,9 @@ struct tipc_node *tipc_node_create(u32 addr) INIT_LIST_HEAD(&n_ptr->nsub); n_num = tipc_node(addr); - tipc_net.nodes[n_num] = n_ptr; - if (n_num > tipc_net.highest_node) - tipc_net.highest_node = n_num; + tipc_nodes[n_num] = n_ptr; + if (n_num > tipc_highest_node) + tipc_highest_node = n_num; spin_unlock_bh(&node_create_lock); return n_ptr; @@ -97,11 +97,11 @@ void tipc_node_delete(struct tipc_node *n_ptr) return; n_num = tipc_node(n_ptr->addr); - tipc_net.nodes[n_num] = NULL; + tipc_nodes[n_num] = NULL; kfree(n_ptr); - while (!tipc_net.nodes[tipc_net.highest_node]) - if (--tipc_net.highest_node == 0) + while (!tipc_nodes[tipc_highest_node]) + if (--tipc_highest_node == 0) break; } @@ -233,7 +233,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr) if (!n_ptr->links[bearer_id]) { n_ptr->links[bearer_id] = l_ptr; - atomic_inc(&tipc_net.links); + atomic_inc(&tipc_num_links); n_ptr->link_cnt++; return n_ptr; } @@ -247,7 +247,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr) void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) { n_ptr->links[l_ptr->b_ptr->identity] = NULL; - atomic_dec(&tipc_net.links); + atomic_dec(&tipc_num_links); n_ptr->link_cnt--; } @@ -390,7 +390,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) " (network address)"); read_lock_bh(&tipc_net_lock); - if (!tipc_net.nodes) { + if (!tipc_nodes) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } @@ -398,7 +398,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* For now, get space for all other nodes */ payload_size = TLV_SPACE(sizeof(node_info)) * - (tipc_net.highest_node - 1); + (tipc_highest_node - 1); if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -412,8 +412,8 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for all nodes in scope */ - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { - n_ptr = tipc_net.nodes[n_num]; + for (n_num = 1; n_num <= tipc_highest_node; n_num++) { + n_ptr = tipc_nodes[n_num]; if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) continue; node_info.addr = htonl(n_ptr->addr); @@ -451,7 +451,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Get space for all unicast links + multicast link */ payload_size = TLV_SPACE(sizeof(link_info)) * - (atomic_read(&tipc_net.links) + 1); + (atomic_read(&tipc_num_links) + 1); if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -472,10 +472,10 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for any other links in scope */ - for (n_num = 1; n_num <= tipc_net.highest_node; n_num++) { + for (n_num = 1; n_num <= tipc_highest_node; n_num++) { u32 i; - n_ptr = tipc_net.nodes[n_num]; + n_ptr = tipc_nodes[n_num]; if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) continue; tipc_node_lock(n_ptr); diff --git a/net/tipc/node.h b/net/tipc/node.h index 206a8ef..c510a2a 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -107,7 +107,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) static inline struct tipc_node *tipc_node_find(u32 addr) { if (likely(in_own_cluster(addr))) - return tipc_net.nodes[tipc_node(addr)]; + return tipc_nodes[tipc_node(addr)]; return NULL; } -- cgit v1.1 From f831c963b5c20bec230edce89e25f369996be5db Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 25 Feb 2011 14:22:11 -0500 Subject: tipc: Eliminate configuration for maximum number of cluster nodes Gets rid of the need for users to specify the maximum number of cluster nodes supported by TIPC. TIPC now automatically provides support for all 4K nodes allowed by its addressing scheme. Note: This change sets TIPC's memory usage to the amount used by a maximum size node table with 4K entries. An upcoming patch that converts the node table from a linear array to a hash table will compact the node table to a more efficient design, but for clarity it is nice to have all the Kconfig infrastruture go away separately. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/Kconfig | 12 ------------ net/tipc/addr.c | 11 +++-------- net/tipc/config.c | 29 +++-------------------------- net/tipc/core.c | 6 ------ net/tipc/core.h | 1 - net/tipc/net.c | 3 +-- 6 files changed, 7 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 0436927..2c5954b 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -29,18 +29,6 @@ config TIPC_ADVANCED Saying Y here will open some advanced configuration for TIPC. Most users do not need to bother; if unsure, just say N. -config TIPC_NODES - int "Maximum number of nodes in a cluster" - depends on TIPC_ADVANCED - range 8 2047 - default "255" - help - Specifies how many nodes can be supported in a TIPC cluster. - Can range from 8 to 2047 nodes; default is 255. - - Setting this to a smaller value saves some memory; - setting it to higher allows for more nodes. - config TIPC_PORTS int "Maximum number of ports in a node" depends on TIPC_ADVANCED diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 087e399..a6fdab3 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -2,7 +2,7 @@ * net/tipc/addr.c: TIPC address utility routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2004-2005, Wind River Systems + * Copyright (c) 2004-2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,7 +41,7 @@ * tipc_addr_domain_valid - validates a network domain address * * Accepts , , , and <0.0.0>, - * where Z, C, and N are non-zero and do not exceed the configured limits. + * where Z, C, and N are non-zero. * * Returns 1 if domain address is valid, otherwise 0 */ @@ -51,10 +51,6 @@ int tipc_addr_domain_valid(u32 addr) u32 n = tipc_node(addr); u32 c = tipc_cluster(addr); u32 z = tipc_zone(addr); - u32 max_nodes = tipc_max_nodes; - - if (n > max_nodes) - return 0; if (n && (!z || !c)) return 0; @@ -66,8 +62,7 @@ int tipc_addr_domain_valid(u32 addr) /** * tipc_addr_node_valid - validates a proposed network address for this node * - * Accepts , where Z, C, and N are non-zero and do not exceed - * the configured limits. + * Accepts , where Z, C, and N are non-zero. * * Returns 1 if address can be used, otherwise 0 */ diff --git a/net/tipc/config.c b/net/tipc/config.c index e16750d..fa3d508 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, Wind River Systems + * Copyright (c) 2004-2007, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -260,25 +260,6 @@ static struct sk_buff *cfg_set_max_ports(void) return tipc_cfg_reply_none(); } -static struct sk_buff *cfg_set_max_nodes(void) -{ - u32 value; - - if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED)) - return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); - value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area)); - if (value == tipc_max_nodes) - return tipc_cfg_reply_none(); - if (value != delimit(value, 8, 2047)) - return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE - " (max nodes must be 8-2047)"); - if (tipc_mode == TIPC_NET_MODE) - return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED - " (cannot change max nodes once TIPC has joined a network)"); - tipc_max_nodes = value; - return tipc_cfg_reply_none(); -} - static struct sk_buff *cfg_set_netid(void) { u32 value; @@ -397,9 +378,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_SET_MAX_SUBSCR: rep_tlv_buf = cfg_set_max_subscriptions(); break; - case TIPC_CMD_SET_MAX_NODES: - rep_tlv_buf = cfg_set_max_nodes(); - break; case TIPC_CMD_SET_NETID: rep_tlv_buf = cfg_set_netid(); break; @@ -415,9 +393,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_SUBSCR: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions); break; - case TIPC_CMD_GET_MAX_NODES: - rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_nodes); - break; case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; @@ -431,6 +406,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_MAX_SLAVES: case TIPC_CMD_SET_MAX_CLUSTERS: case TIPC_CMD_GET_MAX_CLUSTERS: + case TIPC_CMD_SET_MAX_NODES: + case TIPC_CMD_GET_MAX_NODES: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (obsolete command)"); break; diff --git a/net/tipc/core.c b/net/tipc/core.c index 2da1fc7..c9a73e7 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -41,10 +41,6 @@ #include "config.h" -#ifndef CONFIG_TIPC_NODES -#define CONFIG_TIPC_NODES 255 -#endif - #ifndef CONFIG_TIPC_PORTS #define CONFIG_TIPC_PORTS 8191 #endif @@ -64,7 +60,6 @@ const char tipc_alphabet[] = /* configurable TIPC parameters */ u32 tipc_own_addr; -int tipc_max_nodes; int tipc_max_ports; int tipc_max_subscriptions; int tipc_max_publications; @@ -192,7 +187,6 @@ static int __init tipc_init(void) tipc_max_publications = 10000; tipc_max_subscriptions = 2000; tipc_max_ports = CONFIG_TIPC_PORTS; - tipc_max_nodes = CONFIG_TIPC_NODES; tipc_net_id = 4711; res = tipc_core_start(); diff --git a/net/tipc/core.h b/net/tipc/core.h index 37544d9..436dda1 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -147,7 +147,6 @@ void tipc_msg_dbg(struct print_buf *, struct tipc_msg *, const char *); */ extern u32 tipc_own_addr; -extern int tipc_max_nodes; extern int tipc_max_ports; extern int tipc_max_subscriptions; extern int tipc_max_publications; diff --git a/net/tipc/net.c b/net/tipc/net.c index f6303d7..b5b337f 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -114,8 +114,7 @@ atomic_t tipc_num_links; static int net_start(void) { - tipc_nodes = kcalloc(tipc_max_nodes + 1, - sizeof(*tipc_nodes), GFP_ATOMIC); + tipc_nodes = kcalloc(4096, sizeof(*tipc_nodes), GFP_ATOMIC); tipc_highest_node = 0; atomic_set(&tipc_num_links, 0); -- cgit v1.1 From 672d99e19a12b703c9e2d71ead8fb8b8a85a3886 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 25 Feb 2011 18:42:52 -0500 Subject: tipc: Convert node object array to a hash table Replaces the dynamically allocated array of pointers to the cluster's node objects with a static hash table. Hash collisions are resolved using chaining, with a typical hash chain having only a single node, to avoid degrading performance during processing of incoming packets. The conversion to a hash table reduces the memory requirements for TIPC's node table to approximately the same size it had prior to the previous commit. In addition to the hash table itself, TIPC now also maintains a linked list for the node objects, sorted by ascending network address. This list allows TIPC to continue sending responses to user space applications that request node and link information in sorted order. The list also improves performance when name table update messages are sent by making it easier to identify the nodes that must be notified. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/name_distr.c | 6 ++--- net/tipc/net.c | 15 ++++------- net/tipc/net.h | 4 --- net/tipc/node.c | 70 +++++++++++++++++++++++++++++++-------------------- net/tipc/node.h | 30 ++++++++++++++-------- 5 files changed, 70 insertions(+), 55 deletions(-) (limited to 'net') diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index f2086f6..1b70d5d 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -109,11 +109,9 @@ static void named_cluster_distribute(struct sk_buff *buf) { struct sk_buff *buf_copy; struct tipc_node *n_ptr; - u32 n_num; - for (n_num = 1; n_num <= tipc_highest_node; n_num++) { - n_ptr = tipc_nodes[n_num]; - if (n_ptr && tipc_node_has_active_links(n_ptr)) { + list_for_each_entry(n_ptr, &tipc_node_list, list) { + if (tipc_node_has_active_links(n_ptr)) { buf_copy = skb_copy(buf, GFP_ATOMIC); if (!buf_copy) break; diff --git a/net/tipc/net.c b/net/tipc/net.c index b5b337f..cce8d08 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -39,6 +39,7 @@ #include "name_distr.h" #include "subscr.h" #include "port.h" +#include "node.h" #include "config.h" /* @@ -108,27 +109,21 @@ */ DEFINE_RWLOCK(tipc_net_lock); -struct tipc_node **tipc_nodes; -u32 tipc_highest_node; atomic_t tipc_num_links; static int net_start(void) { - tipc_nodes = kcalloc(4096, sizeof(*tipc_nodes), GFP_ATOMIC); - tipc_highest_node = 0; atomic_set(&tipc_num_links, 0); - return tipc_nodes ? 0 : -ENOMEM; + return 0; } static void net_stop(void) { - u32 n_num; + struct tipc_node *node, *t_node; - for (n_num = 1; n_num <= tipc_highest_node; n_num++) - tipc_node_delete(tipc_nodes[n_num]); - kfree(tipc_nodes); - tipc_nodes = NULL; + list_for_each_entry_safe(node, t_node, &tipc_node_list, list) + tipc_node_delete(node); } static void net_route_named_msg(struct sk_buff *buf) diff --git a/net/tipc/net.h b/net/tipc/net.h index b52b974..0ba6093 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,10 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -struct tipc_node; - -extern struct tipc_node **tipc_nodes; -extern u32 tipc_highest_node; extern atomic_t tipc_num_links; extern rwlock_t tipc_net_lock; diff --git a/net/tipc/node.c b/net/tipc/node.c index 64976f2..22aeb2b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -44,9 +44,31 @@ static void node_established_contact(struct tipc_node *n_ptr); static DEFINE_SPINLOCK(node_create_lock); +static struct hlist_head node_htable[NODE_HTABLE_SIZE]; +LIST_HEAD(tipc_node_list); +static u32 tipc_num_nodes; u32 tipc_own_tag; /** + * tipc_node_find - locate specified node object, if it exists + */ + +struct tipc_node *tipc_node_find(u32 addr) +{ + struct tipc_node *node; + struct hlist_node *pos; + + if (unlikely(!in_own_cluster(addr))) + return NULL; + + hlist_for_each_entry(node, pos, &node_htable[tipc_hashfn(addr)], hash) { + if (node->addr == addr) + return node; + } + return NULL; +} + +/** * tipc_node_create - create neighboring node * * Currently, this routine is called by neighbor discovery code, which holds @@ -58,8 +80,7 @@ u32 tipc_own_tag; struct tipc_node *tipc_node_create(u32 addr) { - struct tipc_node *n_ptr; - u32 n_num; + struct tipc_node *n_ptr, *temp_node; spin_lock_bh(&node_create_lock); @@ -78,12 +99,19 @@ struct tipc_node *tipc_node_create(u32 addr) n_ptr->addr = addr; spin_lock_init(&n_ptr->lock); + INIT_HLIST_NODE(&n_ptr->hash); + INIT_LIST_HEAD(&n_ptr->list); INIT_LIST_HEAD(&n_ptr->nsub); - n_num = tipc_node(addr); - tipc_nodes[n_num] = n_ptr; - if (n_num > tipc_highest_node) - tipc_highest_node = n_num; + hlist_add_head(&n_ptr->hash, &node_htable[tipc_hashfn(addr)]); + + list_for_each_entry(temp_node, &tipc_node_list, list) { + if (n_ptr->addr < temp_node->addr) + break; + } + list_add_tail(&n_ptr->list, &temp_node->list); + + tipc_num_nodes++; spin_unlock_bh(&node_create_lock); return n_ptr; @@ -91,18 +119,11 @@ struct tipc_node *tipc_node_create(u32 addr) void tipc_node_delete(struct tipc_node *n_ptr) { - u32 n_num; - - if (!n_ptr) - return; - - n_num = tipc_node(n_ptr->addr); - tipc_nodes[n_num] = NULL; + list_del(&n_ptr->list); + hlist_del(&n_ptr->hash); kfree(n_ptr); - while (!tipc_nodes[tipc_highest_node]) - if (--tipc_highest_node == 0) - break; + tipc_num_nodes--; } @@ -379,7 +400,6 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) struct tipc_node *n_ptr; struct tipc_node_info node_info; u32 payload_size; - u32 n_num; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -390,15 +410,14 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) " (network address)"); read_lock_bh(&tipc_net_lock); - if (!tipc_nodes) { + if (!tipc_num_nodes) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_none(); } /* For now, get space for all other nodes */ - payload_size = TLV_SPACE(sizeof(node_info)) * - (tipc_highest_node - 1); + payload_size = TLV_SPACE(sizeof(node_info)) * tipc_num_nodes; if (payload_size > 32768u) { read_unlock_bh(&tipc_net_lock); return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED @@ -412,9 +431,8 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for all nodes in scope */ - for (n_num = 1; n_num <= tipc_highest_node; n_num++) { - n_ptr = tipc_nodes[n_num]; - if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) + list_for_each_entry(n_ptr, &tipc_node_list, list) { + if (!tipc_in_scope(domain, n_ptr->addr)) continue; node_info.addr = htonl(n_ptr->addr); node_info.up = htonl(tipc_node_is_up(n_ptr)); @@ -433,7 +451,6 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) struct tipc_node *n_ptr; struct tipc_link_info link_info; u32 payload_size; - u32 n_num; if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_NET_ADDR)) return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR); @@ -472,11 +489,10 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) /* Add TLVs for any other links in scope */ - for (n_num = 1; n_num <= tipc_highest_node; n_num++) { + list_for_each_entry(n_ptr, &tipc_node_list, list) { u32 i; - n_ptr = tipc_nodes[n_num]; - if (!n_ptr || !tipc_in_scope(domain, n_ptr->addr)) + if (!tipc_in_scope(domain, n_ptr->addr)) continue; tipc_node_lock(n_ptr); for (i = 0; i < MAX_BEARERS; i++) { diff --git a/net/tipc/node.h b/net/tipc/node.h index c510a2a..02e4927 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -2,7 +2,7 @@ * net/tipc/node.h: Include file for TIPC node management routines * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,7 +46,8 @@ * struct tipc_node - TIPC node structure * @addr: network address of node * @lock: spinlock governing access to structure - * @next: pointer to next node in sorted list of cluster's nodes + * @hash: links to adjacent nodes in unsorted hash chain + * @list: links to adjacent nodes in sorted list of cluster's nodes * @nsub: list of "node down" subscriptions monitoring node * @active_links: pointers to active links to node * @links: pointers to all links to node @@ -69,7 +70,8 @@ struct tipc_node { u32 addr; spinlock_t lock; - struct tipc_node *next; + struct hlist_node hash; + struct list_head list; struct list_head nsub; struct link *active_links[2]; struct link *links[MAX_BEARERS]; @@ -90,8 +92,23 @@ struct tipc_node { } bclink; }; +#define NODE_HTABLE_SIZE 512 +extern struct list_head tipc_node_list; + +/* + * A trivial power-of-two bitmask technique is used for speed, since this + * operation is done for every incoming TIPC packet. The number of hash table + * entries has been chosen so that no hash chain exceeds 8 nodes and will + * usually be much smaller (typically only a single node). + */ +static inline unsigned int tipc_hashfn(u32 addr) +{ + return addr & (NODE_HTABLE_SIZE - 1); +} + extern u32 tipc_own_tag; +struct tipc_node *tipc_node_find(u32 addr); struct tipc_node *tipc_node_create(u32 addr); void tipc_node_delete(struct tipc_node *n_ptr); struct tipc_node *tipc_node_attach_link(struct link *l_ptr); @@ -104,13 +121,6 @@ int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); -static inline struct tipc_node *tipc_node_find(u32 addr) -{ - if (likely(in_own_cluster(addr))) - return tipc_nodes[tipc_node(addr)]; - return NULL; -} - static inline void tipc_node_lock(struct tipc_node *n_ptr) { spin_lock_bh(&n_ptr->lock); -- cgit v1.1 From 34e46258cb9f53b41e8ffd2e9acd58e0cf64b158 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 25 Feb 2011 19:11:25 -0500 Subject: tipc: manually inline net_start/stop, make assoc. vars static Relocates network-related variables into the subsystem files where they are now primarily used (following the recent rework of TIPC's node table), and converts globals into locals where possible. Changes the initialization of tipc_num_links from run-time to compile-time, and eliminates the net_start routine that becomes empty as a result. Also eliminates the corresponding net_stop routine by moving its (trivial) content into the one location that called the routine. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/net.c | 25 ++++--------------------- net/tipc/net.h | 2 -- net/tipc/node.c | 2 ++ 3 files changed, 6 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/tipc/net.c b/net/tipc/net.c index cce8d08..8fbc7e6 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -109,22 +109,6 @@ */ DEFINE_RWLOCK(tipc_net_lock); -atomic_t tipc_num_links; - -static int net_start(void) -{ - atomic_set(&tipc_num_links, 0); - - return 0; -} - -static void net_stop(void) -{ - struct tipc_node *node, *t_node; - - list_for_each_entry_safe(node, t_node, &tipc_node_list, list) - tipc_node_delete(node); -} static void net_route_named_msg(struct sk_buff *buf) { @@ -214,9 +198,6 @@ int tipc_net_start(u32 addr) tipc_named_reinit(); tipc_port_reinit(); - res = net_start(); - if (res) - return res; res = tipc_bclink_init(); if (res) return res; @@ -232,14 +213,16 @@ int tipc_net_start(u32 addr) void tipc_net_stop(void) { + struct tipc_node *node, *t_node; + if (tipc_mode != TIPC_NET_MODE) return; write_lock_bh(&tipc_net_lock); tipc_bearer_stop(); tipc_mode = TIPC_NODE_MODE; tipc_bclink_stop(); - net_stop(); + list_for_each_entry_safe(node, t_node, &tipc_node_list, list); + tipc_node_delete(node); write_unlock_bh(&tipc_net_lock); info("Left network mode\n"); } - diff --git a/net/tipc/net.h b/net/tipc/net.h index 0ba6093..9eb4b9e 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -37,8 +37,6 @@ #ifndef _TIPC_NET_H #define _TIPC_NET_H -extern atomic_t tipc_num_links; - extern rwlock_t tipc_net_lock; void tipc_net_route_msg(struct sk_buff *buf); diff --git a/net/tipc/node.c b/net/tipc/node.c index 22aeb2b..66099cb 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -47,6 +47,8 @@ static DEFINE_SPINLOCK(node_create_lock); static struct hlist_head node_htable[NODE_HTABLE_SIZE]; LIST_HEAD(tipc_node_list); static u32 tipc_num_nodes; + +static atomic_t tipc_num_links = ATOMIC_INIT(0); u32 tipc_own_tag; /** -- cgit v1.1 From e7b3acb6a85266dfd3e102b3d15b51b0ecd6bc2e Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Sun, 27 Feb 2011 14:43:52 -0500 Subject: tipc: Eliminate timestamp from link protocol messages Removes support for the timestamp field of TIPC's link protocol messages. This field was previously used to hold an OS-dependent timestamp value that was used to assist in debugging early versions of TIPC. The field has now been deemed unnecessary and has been removed from the latest TIPC specification. This change has no impact on the operation of TIPC since the field was set by TIPC, but never referenced. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 1 - net/tipc/msg.c | 1 - net/tipc/msg.h | 10 ---------- 3 files changed, 12 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 89fbb6d..d4f2780 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1977,7 +1977,6 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); return; } - msg_set_timestamp(msg, jiffies_to_msecs(jiffies)); /* Message can be sent */ diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0787e12..b694c9a 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -238,7 +238,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); break; case LINK_PROTOCOL: - tipc_printf(buf, "PROT:TIM(%u):", msg_timestamp(msg)); switch (msg_type(msg)) { case STATE_MSG: tipc_printf(buf, "STATE:"); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 9d643a1..6948d3d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -438,16 +438,6 @@ static inline void msg_set_nametype(struct tipc_msg *m, u32 n) msg_set_word(m, 8, n); } -static inline void msg_set_timestamp(struct tipc_msg *m, u32 n) -{ - msg_set_word(m, 8, n); -} - -static inline u32 msg_timestamp(struct tipc_msg *m) -{ - return msg_word(m, 8); -} - static inline u32 msg_nameinst(struct tipc_msg *m) { return msg_word(m, 9); -- cgit v1.1 From 8f19afb2dbc885befef2a4e7931dfcb51702a212 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 28 Feb 2011 11:36:21 -0400 Subject: tipc: cosmetic - function names are not to be full sentences Function names like "tipc_node_has_redundant_links" are unweildy and result in long lines even for simple lines. The "has" doesn't contribute any value add, so dropping that is a slight step in the right direction. This is a cosmetic change, basic result of: for i in `grep -l tipc_node_has_ *` ; do sed -i s/tipc_node_has_/tipc_node_/ $i ; done Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 6 +++--- net/tipc/name_distr.c | 2 +- net/tipc/node.c | 6 +++--- net/tipc/node.h | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index d4f2780..d8a4b90 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -548,7 +548,7 @@ void tipc_link_reset(struct link *l_ptr) tipc_node_link_down(l_ptr->owner, l_ptr); tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); - if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && + if (was_active_link && tipc_node_active_links(l_ptr->owner) && l_ptr->owner->permit_changeover) { l_ptr->reset_checkpoint = checkpoint; l_ptr->exp_msg_count = START_CHANGEOVER; @@ -1954,7 +1954,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, msg_set_max_pkt(msg, l_ptr->max_pkt_target); } - if (tipc_node_has_redundant_links(l_ptr->owner)) + if (tipc_node_redundant_links(l_ptr->owner)) msg_set_redundant_link(msg); else msg_clear_redundant_link(msg); @@ -2064,7 +2064,7 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) l_ptr->peer_bearer_id = msg_bearer_id(msg); /* Synchronize broadcast sequence numbers */ - if (!tipc_node_has_redundant_links(l_ptr->owner)) + if (!tipc_node_redundant_links(l_ptr->owner)) l_ptr->owner->bclink.last_in = mod(msg_last_bcast(msg)); break; case STATE_MSG: diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 1b70d5d..c9fa6df 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -111,7 +111,7 @@ static void named_cluster_distribute(struct sk_buff *buf) struct tipc_node *n_ptr; list_for_each_entry(n_ptr, &tipc_node_list, list) { - if (tipc_node_has_active_links(n_ptr)) { + if (tipc_node_active_links(n_ptr)) { buf_copy = skb_copy(buf, GFP_ATOMIC); if (!buf_copy) break; diff --git a/net/tipc/node.c b/net/tipc/node.c index 66099cb..ca09b33 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -223,19 +223,19 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr) node_lost_contact(n_ptr); } -int tipc_node_has_active_links(struct tipc_node *n_ptr) +int tipc_node_active_links(struct tipc_node *n_ptr) { return n_ptr->active_links[0] != NULL; } -int tipc_node_has_redundant_links(struct tipc_node *n_ptr) +int tipc_node_redundant_links(struct tipc_node *n_ptr) { return n_ptr->working_links > 1; } int tipc_node_is_up(struct tipc_node *n_ptr) { - return tipc_node_has_active_links(n_ptr); + return tipc_node_active_links(n_ptr); } struct tipc_node *tipc_node_attach_link(struct link *l_ptr) diff --git a/net/tipc/node.h b/net/tipc/node.h index 02e4927..dde3165 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -115,8 +115,8 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); -int tipc_node_has_active_links(struct tipc_node *n_ptr); -int tipc_node_has_redundant_links(struct tipc_node *n_ptr); +int tipc_node_active_links(struct tipc_node *n_ptr); +int tipc_node_redundant_links(struct tipc_node *n_ptr); int tipc_node_is_up(struct tipc_node *n_ptr); struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space); struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space); -- cgit v1.1 From 77f167fcce4d2ea144d92891d1e0fc0c50554082 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 15:30:20 -0500 Subject: tipc: make msg_set_redundant_link() consistent with other set ops All the other boolean like msg_set_X(m) operations don't export both a msg_set_X(a) and a msg_clear_X(m), but instead just have the single msg_set_X(m, val) variant. Make the redundant_link one consistent by having the set take a value, and delete the msg_clear_redundant_link() anomoly. This is a cosmetic change and should not change behaviour. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 5 +---- net/tipc/msg.h | 9 ++------- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index d8a4b90..6ffae0e 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1954,10 +1954,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, msg_set_max_pkt(msg, l_ptr->max_pkt_target); } - if (tipc_node_redundant_links(l_ptr->owner)) - msg_set_redundant_link(msg); - else - msg_clear_redundant_link(msg); + msg_set_redundant_link(msg, tipc_node_redundant_links(l_ptr->owner)); msg_set_linkprio(msg, l_ptr->priority); /* Ensure sequence number will not fit : */ diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 6948d3d..bea0126 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -719,14 +719,9 @@ static inline u32 msg_redundant_link(struct tipc_msg *m) return msg_bits(m, 5, 12, 0x1); } -static inline void msg_set_redundant_link(struct tipc_msg *m) +static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) { - msg_set_bits(m, 5, 12, 0x1, 1); -} - -static inline void msg_clear_redundant_link(struct tipc_msg *m) -{ - msg_set_bits(m, 5, 12, 0x1, 0); + msg_set_bits(m, 5, 12, 0x1, r); } -- cgit v1.1 From 75f0aa49908992dbeb75710b72cbedb5cff9680f Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 15:30:20 -0500 Subject: tipc: Fix redundant link field handling in link protocol message Ensures that the "redundant link exists" field of the LINK_PROTOCOL messages sent by a link endpoint is set if and only if the sending node has at least one other working link to the peer node. Previously, the bit was set only if there were at least 2 working links to the peer node, meaning the bit was incorrectly left unset in messages sent by a non-working link endpoint when exactly one alternate working link was available. The revised code now takes the state of the link sending the message into account when deciding if an alternate link exists. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index 6ffae0e..e5f96d5 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1898,6 +1898,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; u32 msg_size = sizeof(l_ptr->proto_msg); + int r_flag; if (link_blocked(l_ptr)) return; @@ -1954,7 +1955,8 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, msg_set_max_pkt(msg, l_ptr->max_pkt_target); } - msg_set_redundant_link(msg, tipc_node_redundant_links(l_ptr->owner)); + r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr)); + msg_set_redundant_link(msg, r_flag); msg_set_linkprio(msg, l_ptr->priority); /* Ensure sequence number will not fit : */ -- cgit v1.1 From a728750e4f0c9500741406299f1817022d411d33 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 10:03:05 -0500 Subject: tipc: Cosmetic changes to neighbor discovery logic Reworks the appearance of the routine that processes incoming LINK_CONFIG messages to keep the main logic flow at a consistent level of indentation, and to add comments outlining the various phases involved in processing each message. This rework is being done to allow upcoming enhancements to this routine to be integrated more cleanly. The diff isn't really readable, so know that it was a case of the old code being like: tipc_disc_recv_msg(..) { if (in_own_cluster(orig)) { ... lines and lines of stuff ... } } which is now replaced with the more sane: tipc_disc_recv_msg(..) { if (!in_own_cluster(orig)) return; ... lines and lines of stuff ... } Instances of spin locking within the reindented block were replaced with the identical tipc_node_[un]lock() abstractions. Note that all these changes are cosmetic in nature, and do not change the way LINK_CONFIG messages are processed. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/discover.c | 103 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 09ce231..2345268 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -119,17 +119,21 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr, void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) { + struct tipc_node *n_ptr; struct link *link; - struct tipc_media_addr media_addr; + struct tipc_media_addr media_addr, *addr; + struct sk_buff *rbuf; struct tipc_msg *msg = buf_msg(buf); u32 dest = msg_dest_domain(msg); u32 orig = msg_prevnode(msg); u32 net_id = msg_bc_netid(msg); u32 type = msg_type(msg); + int link_fully_up; msg_get_media_addr(msg, &media_addr); buf_discard(buf); + /* Validate discovery message from requesting node */ if (net_id != tipc_net_id) return; if (!tipc_addr_domain_valid(dest)) @@ -143,56 +147,67 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) } if (!tipc_in_scope(dest, tipc_own_addr)) return; - if (in_own_cluster(orig)) { - /* Always accept link here */ - struct sk_buff *rbuf; - struct tipc_media_addr *addr; - struct tipc_node *n_ptr = tipc_node_find(orig); - int link_fully_up; - - if (n_ptr == NULL) { - n_ptr = tipc_node_create(orig); - if (!n_ptr) - return; - } - spin_lock_bh(&n_ptr->lock); - - /* Don't talk to neighbor during cleanup after last session */ + if (!in_own_cluster(orig)) + return; - if (n_ptr->cleanup_required) { - spin_unlock_bh(&n_ptr->lock); + /* Locate structure corresponding to requesting node */ + n_ptr = tipc_node_find(orig); + if (!n_ptr) { + n_ptr = tipc_node_create(orig); + if (!n_ptr) return; - } + } + tipc_node_lock(n_ptr); + + /* Don't talk to neighbor during cleanup after last session */ + if (n_ptr->cleanup_required) { + tipc_node_unlock(n_ptr); + return; + } - link = n_ptr->links[b_ptr->identity]; + link = n_ptr->links[b_ptr->identity]; + + /* Create a link endpoint for this bearer, if necessary */ + if (!link) { + link = tipc_link_create(b_ptr, orig, &media_addr); if (!link) { - link = tipc_link_create(b_ptr, orig, &media_addr); - if (!link) { - spin_unlock_bh(&n_ptr->lock); - return; - } - } - addr = &link->media_addr; - if (memcmp(addr, &media_addr, sizeof(*addr))) { - if (tipc_link_is_up(link) || (!link->started)) { - disc_dupl_alert(b_ptr, orig, &media_addr); - spin_unlock_bh(&n_ptr->lock); - return; - } - warn("Resetting link <%s>, peer interface address changed\n", - link->name); - memcpy(addr, &media_addr, sizeof(*addr)); - tipc_link_reset(link); + tipc_node_unlock(n_ptr); + return; } - link_fully_up = link_working_working(link); - spin_unlock_bh(&n_ptr->lock); - if ((type == DSC_RESP_MSG) || link_fully_up) + } + + /* + * Ensure requesting node's media address is correct + * + * If media address doesn't match and the link is working, reject the + * request (must be from a duplicate node). + * + * If media address doesn't match and the link is not working, accept + * the new media address and reset the link to ensure it starts up + * cleanly. + */ + addr = &link->media_addr; + if (memcmp(addr, &media_addr, sizeof(*addr))) { + if (tipc_link_is_up(link) || (!link->started)) { + disc_dupl_alert(b_ptr, orig, &media_addr); + tipc_node_unlock(n_ptr); return; - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); - if (rbuf != NULL) { - b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); - buf_discard(rbuf); } + warn("Resetting link <%s>, peer interface address changed\n", + link->name); + memcpy(addr, &media_addr, sizeof(*addr)); + tipc_link_reset(link); + } + + /* Accept discovery message & send response, if necessary */ + link_fully_up = link_working_working(link); + tipc_node_unlock(n_ptr); + if ((type == DSC_RESP_MSG) || link_fully_up) + return; + rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); + if (rbuf != NULL) { + b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); + buf_discard(rbuf); } } -- cgit v1.1 From fa2bae2d5bede252445cc457737d00f9036c41c3 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 10:56:23 -0500 Subject: tipc: Give Tx of discovery responses priority over link messages Delay releasing the node lock when processing a neighbor discovery message until after the optional discovery response message has been sent. This helps ensure that any link protocol messages sent by a link endpoint created as a result of a neighbor discovery request are received after the discovery response is received, thereby giving the receiving node a chance to create a peer link endpoint to consume those link protocol messages, if one does not already exist. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/discover.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 2345268..580b50a 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -201,14 +201,16 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) /* Accept discovery message & send response, if necessary */ link_fully_up = link_working_working(link); - tipc_node_unlock(n_ptr); - if ((type == DSC_RESP_MSG) || link_fully_up) - return; - rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); - if (rbuf != NULL) { - b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); - buf_discard(rbuf); + + if ((type == DSC_REQ_MSG) && !link_fully_up) { + rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); + if (rbuf) { + b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); + buf_discard(rbuf); + } } + + tipc_node_unlock(n_ptr); } /** -- cgit v1.1 From 37b9c08a88f9a82456bb11fa050cccb544e8dc60 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 11:32:27 -0500 Subject: tipc: Optimizations to link creation code Enhances link creation code as follows: 1) Detects illegal attempts to add a requested link earlier in the link creation process. This prevents TIPC from wasting time initializing a link object it then throws away, and also eliminates the code needed to do the throwing away. 2) Passes in the node object associated with the requested link. This allows TIPC to eliminate a search to locate the node object, as well as code that attempted to create the node if it doesn't exist. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/discover.c | 2 +- net/tipc/link.c | 27 ++++++++++++++++++++------- net/tipc/link.h | 3 ++- net/tipc/node.c | 30 ++++-------------------------- net/tipc/node.h | 2 +- 5 files changed, 28 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 580b50a..caac5c9 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -169,7 +169,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) /* Create a link endpoint for this bearer, if necessary */ if (!link) { - link = tipc_link_create(b_ptr, orig, &media_addr); + link = tipc_link_create(n_ptr, b_ptr, &media_addr); if (!link) { tipc_node_unlock(n_ptr); return; diff --git a/net/tipc/link.c b/net/tipc/link.c index e5f96d5..b73adeb 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -293,19 +293,35 @@ static void link_set_timer(struct link *l_ptr, u32 time) /** * tipc_link_create - create a new link + * @n_ptr: pointer to associated node * @b_ptr: pointer to associated bearer - * @peer: network address of node at other end of link * @media_addr: media address to use when sending messages over link * * Returns pointer to link. */ -struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, +struct link *tipc_link_create(struct tipc_node *n_ptr, + struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr) { struct link *l_ptr; struct tipc_msg *msg; char *if_name; + char addr_string[16]; + u32 peer = n_ptr->addr; + + if (n_ptr->link_cnt >= 2) { + tipc_addr_string_fill(addr_string, n_ptr->addr); + err("Attempt to establish third link to %s\n", addr_string); + return NULL; + } + + if (n_ptr->links[b_ptr->identity]) { + tipc_addr_string_fill(addr_string, n_ptr->addr); + err("Attempt to establish second link on <%s> to %s\n", + b_ptr->name, addr_string); + return NULL; + } l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC); if (!l_ptr) { @@ -322,6 +338,7 @@ struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer)); /* note: peer i/f is appended to link name by reset/activate */ memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr)); + l_ptr->owner = n_ptr; l_ptr->checkpoint = 1; l_ptr->b_ptr = b_ptr; link_set_supervision_props(l_ptr, b_ptr->media->tolerance); @@ -345,11 +362,7 @@ struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, link_reset_statistics(l_ptr); - l_ptr->owner = tipc_node_attach_link(l_ptr); - if (!l_ptr->owner) { - kfree(l_ptr); - return NULL; - } + tipc_node_attach_link(n_ptr, l_ptr); k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); list_add_tail(&l_ptr->link_list, &b_ptr->links); diff --git a/net/tipc/link.h b/net/tipc/link.h index a7794e7..e6a30db 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -207,7 +207,8 @@ struct link { struct tipc_port; -struct link *tipc_link_create(struct tipc_bearer *b_ptr, const u32 peer, +struct link *tipc_link_create(struct tipc_node *n_ptr, + struct tipc_bearer *b_ptr, const struct tipc_media_addr *media_addr); void tipc_link_delete(struct link *l_ptr); void tipc_link_changeover(struct link *l_ptr); diff --git a/net/tipc/node.c b/net/tipc/node.c index ca09b33..2d106ef 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -238,33 +238,11 @@ int tipc_node_is_up(struct tipc_node *n_ptr) return tipc_node_active_links(n_ptr); } -struct tipc_node *tipc_node_attach_link(struct link *l_ptr) +void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr) { - struct tipc_node *n_ptr = tipc_node_find(l_ptr->addr); - - if (!n_ptr) - n_ptr = tipc_node_create(l_ptr->addr); - if (n_ptr) { - u32 bearer_id = l_ptr->b_ptr->identity; - char addr_string[16]; - - if (n_ptr->link_cnt >= 2) { - err("Attempt to create third link to %s\n", - tipc_addr_string_fill(addr_string, n_ptr->addr)); - return NULL; - } - - if (!n_ptr->links[bearer_id]) { - n_ptr->links[bearer_id] = l_ptr; - atomic_inc(&tipc_num_links); - n_ptr->link_cnt++; - return n_ptr; - } - err("Attempt to establish second link on <%s> to %s\n", - l_ptr->b_ptr->name, - tipc_addr_string_fill(addr_string, l_ptr->addr)); - } - return NULL; + n_ptr->links[l_ptr->b_ptr->identity] = l_ptr; + atomic_inc(&tipc_num_links); + n_ptr->link_cnt++; } void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr) diff --git a/net/tipc/node.h b/net/tipc/node.h index dde3165..5c61afc 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -111,7 +111,7 @@ extern u32 tipc_own_tag; struct tipc_node *tipc_node_find(u32 addr); struct tipc_node *tipc_node_create(u32 addr); void tipc_node_delete(struct tipc_node *n_ptr); -struct tipc_node *tipc_node_attach_link(struct link *l_ptr); +void tipc_node_attach_link(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_detach_link(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr); void tipc_node_link_up(struct tipc_node *n_ptr, struct link *l_ptr); -- cgit v1.1 From 50d3e6399a61fca53c5c440a79f71299db66b803 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 14:56:15 -0500 Subject: tipc: Correct misnamed references to neighbor discovery domain Renames items that are improperly labelled as "network scope" items (which are represented by simple integer values) rather than "network domain" items (which are represented by -type network addresses). This change is purely cosmetic, and does not affect the operation of TIPC. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/bearer.c | 14 +++++++------- net/tipc/bearer.h | 2 +- net/tipc/config.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index f2839b0..9815797 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -474,7 +474,7 @@ int tipc_bearer_congested(struct tipc_bearer *b_ptr, struct link *l_ptr) * tipc_enable_bearer - enable bearer with the given name */ -int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) +int tipc_enable_bearer(const char *name, u32 disc_domain, u32 priority) { struct tipc_bearer *b_ptr; struct media *m_ptr; @@ -494,9 +494,9 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority) warn("Bearer <%s> rejected, illegal name\n", name); return -EINVAL; } - if (!tipc_addr_domain_valid(bcast_scope) || - !tipc_in_scope(bcast_scope, tipc_own_addr)) { - warn("Bearer <%s> rejected, illegal broadcast scope\n", name); + if (!tipc_addr_domain_valid(disc_domain) || + !tipc_in_scope(disc_domain, tipc_own_addr)) { + warn("Bearer <%s> rejected, illegal discovery domain\n", name); return -EINVAL; } if ((priority < TIPC_MIN_LINK_PRI || @@ -560,18 +560,18 @@ restart: b_ptr->media = m_ptr; b_ptr->net_plane = bearer_id + 'A'; b_ptr->active = 1; - b_ptr->detect_scope = bcast_scope; + b_ptr->detect_scope = disc_domain; b_ptr->priority = priority; INIT_LIST_HEAD(&b_ptr->cong_links); INIT_LIST_HEAD(&b_ptr->links); if (m_ptr->bcast) { b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, - bcast_scope); + disc_domain); } spin_lock_init(&b_ptr->lock); write_unlock_bh(&tipc_net_lock); info("Enabled bearer <%s>, discovery domain %s, priority %u\n", - name, tipc_addr_string_fill(addr_string, bcast_scope), priority); + name, tipc_addr_string_fill(addr_string, disc_domain), priority); return 0; failed: write_unlock_bh(&tipc_net_lock); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 255dea6..adebdaf 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -167,7 +167,7 @@ void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); int tipc_block_bearer(const char *name); void tipc_continue(struct tipc_bearer *tb_ptr); -int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); +int tipc_enable_bearer(const char *bearer_name, u32 disc_domain, u32 priority); int tipc_disable_bearer(const char *name); /* diff --git a/net/tipc/config.c b/net/tipc/config.c index fa3d508..b25a396 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -148,7 +148,7 @@ static struct sk_buff *cfg_enable_bearer(void) args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area); if (tipc_enable_bearer(args->name, - ntohl(args->detect_scope), + ntohl(args->disc_domain), ntohl(args->priority))) return tipc_cfg_reply_error_string("unable to enable bearer"); -- cgit v1.1 From a2b58de2e3993a23b092ae54a35c38bf0dacb618 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 15:03:22 -0500 Subject: tipc: Remove unused field in bearer structure Eliminates a field in TIPC's bearer objects that is set, but never referenced. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/bearer.c | 1 - net/tipc/bearer.h | 2 -- 2 files changed, 3 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 9815797..c9b5268 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -560,7 +560,6 @@ restart: b_ptr->media = m_ptr; b_ptr->net_plane = bearer_id + 'A'; b_ptr->active = 1; - b_ptr->detect_scope = disc_domain; b_ptr->priority = priority; INIT_LIST_HEAD(&b_ptr->cong_links); INIT_LIST_HEAD(&b_ptr->links); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index adebdaf..305b378 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -105,7 +105,6 @@ struct media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @priority: default link priority for bearer - * @detect_scope: network address mask used during automatic link creation * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests * @links: list of non-congested links associated with bearer @@ -128,7 +127,6 @@ struct tipc_bearer { spinlock_t lock; struct media *media; u32 priority; - u32 detect_scope; u32 identity; struct link_req *link_req; struct list_head links; -- cgit v1.1 From d901a42b271dbd94983b798955403dcf1afa60ac Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 16:02:30 -0500 Subject: tipc: Eliminate unnecessary constant for neighbor discovery msg size Eliminates an unnecessary constant that defines the size of a LINK_CONFIG message, and uses one of the existing standard message size symbols in its place. (The defunct constant was located in the wrong place anyway, since it was grouped with other constants that define message users instead of message sizes.) Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/discover.c | 4 ++-- net/tipc/msg.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index caac5c9..2c14d6d 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -75,12 +75,12 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, struct tipc_bearer *b_ptr) { - struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE); + struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); struct tipc_msg *msg; if (buf) { msg = buf_msg(buf); - tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); + tipc_msg_init(msg, LINK_CONFIG, type, INT_H_SIZE, dest_domain); msg_set_non_seq(msg, 1); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index bea0126..5006366 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -525,7 +525,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 -#define DSC_H_SIZE 40 /* * Connection management protocol messages -- cgit v1.1 From f9107ebe7d18a04f07d2a990a912efa2a2ac1873 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 11:04:08 -0500 Subject: tipc: Don't respond to neighbor discovery request on blocked bearer Adds a check to prevent TIPC from trying to respond to an incoming LINK_CONFIG request message if the associated bearer is currently prohibited from sending messages. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/discover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 2c14d6d..491eff5 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -202,7 +202,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr) /* Accept discovery message & send response, if necessary */ link_fully_up = link_working_working(link); - if ((type == DSC_REQ_MSG) && !link_fully_up) { + if ((type == DSC_REQ_MSG) && !link_fully_up && !b_ptr->blocked) { rbuf = tipc_disc_init_msg(DSC_RESP_MSG, orig, b_ptr); if (rbuf) { b_ptr->media->send_msg(rbuf, b_ptr, &media_addr); -- cgit v1.1 From 50d492321a2d94aa2ff5e26e73af08d937f8acb0 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Mon, 28 Feb 2011 11:47:36 -0500 Subject: tipc: Remove bearer flag indicating existence of broadcast address Eliminates the flag in the TIPC bearer structure that indicates if the bearer supports broadcasting, since the flag is always set to 1 and serves no useful purpose. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/bearer.c | 7 ++----- net/tipc/bearer.h | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index c9b5268..411719f 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -158,7 +158,6 @@ int tipc_register_media(u32 media_type, m_ptr->disable_bearer = disable; m_ptr->addr2str = addr2str; memcpy(&m_ptr->bcast_addr, bcast_addr, sizeof(*bcast_addr)); - m_ptr->bcast = 1; strcpy(m_ptr->name, name); m_ptr->priority = bearer_priority; m_ptr->tolerance = link_tolerance; @@ -563,10 +562,8 @@ restart: b_ptr->priority = priority; INIT_LIST_HEAD(&b_ptr->cong_links); INIT_LIST_HEAD(&b_ptr->links); - if (m_ptr->bcast) { - b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, - disc_domain); - } + b_ptr->link_req = tipc_disc_init_link_req(b_ptr, &m_ptr->bcast_addr, + disc_domain); spin_lock_init(&b_ptr->lock); write_unlock_bh(&tipc_net_lock); info("Enabled bearer <%s>, discovery domain %s, priority %u\n", diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 305b378..31d6172 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -70,7 +70,6 @@ struct tipc_bearer; * @disable_bearer: routine which disables a bearer * @addr2str: routine which converts bearer's address to string form * @bcast_addr: media address used in broadcasting - * @bcast: non-zero if media supports broadcasting [currently mandatory] * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion @@ -87,7 +86,6 @@ struct media { char *(*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); struct tipc_media_addr bcast_addr; - int bcast; u32 priority; u32 tolerance; u32 window; -- cgit v1.1 From 7945c1fb02ef08316df8c054ce180bf3f4e35ae4 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 11 Mar 2011 13:09:28 -0500 Subject: tipc: Eliminate remaining support for routing table messages Gets rid of all remaining code relating to ROUTE_DISTRIBUTOR messages. These messages were only used in multi-cluster and multi-zone networks, which TIPC no longer supports. (For safety, TIPC now treats such messages the same way that it handles other unrecognized messages.) Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 8 ++++---- net/tipc/msg.c | 27 --------------------------- net/tipc/msg.h | 15 --------------- 3 files changed, 4 insertions(+), 46 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index b73adeb..a572f0a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1746,10 +1746,6 @@ deliver: tipc_node_unlock(n_ptr); tipc_link_recv_bundle(buf); continue; - case ROUTE_DISTRIBUTOR: - tipc_node_unlock(n_ptr); - buf_discard(buf); - continue; case NAME_DISTRIBUTOR: tipc_node_unlock(n_ptr); tipc_named_recv(buf); @@ -1776,6 +1772,10 @@ deliver: goto protocol_check; } break; + default: + buf_discard(buf); + buf = NULL; + break; } } tipc_node_unlock(n_ptr); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index b694c9a..0562b38 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -274,33 +274,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } break; - case ROUTE_DISTRIBUTOR: - tipc_printf(buf, "ROUTING_MNG:"); - switch (msg_type(msg)) { - case EXT_ROUTING_TABLE: - tipc_printf(buf, "EXT_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case LOCAL_ROUTING_TABLE: - tipc_printf(buf, "LOCAL_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case SLAVE_ROUTING_TABLE: - tipc_printf(buf, "DP_TBL:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case ROUTE_ADDITION: - tipc_printf(buf, "ADD:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - case ROUTE_REMOVAL: - tipc_printf(buf, "REMOVE:"); - tipc_printf(buf, "TO:%x:", msg_remote_node(msg)); - break; - default: - tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); - } - break; case LINK_CONFIG: tipc_printf(buf, "CFG:"); switch (msg_type(msg)) { diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 5006366..7b4f4d7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -769,21 +769,6 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) } /* - * Routing table message data - */ - - -static inline u32 msg_remote_node(struct tipc_msg *m) -{ - return msg_word(m, msg_hdr_sz(m)/4); -} - -static inline void msg_set_remote_node(struct tipc_msg *m, u32 a) -{ - msg_set_word(m, msg_hdr_sz(m)/4, a); -} - -/* * Segmentation message types */ -- cgit v1.1 From 390bce4237487975c2168aa5fa786f75ead66852 Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Fri, 11 Mar 2011 13:22:53 -0500 Subject: tipc: Eliminate obsolete routine for handling routed messages Eliminates a routine that is used in handling messages arriving from another cluster or zone. Such messages can no longer be received by TIPC now that multi-cluster and multi-zone network support has been eliminated. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker --- net/tipc/link.c | 3 --- net/tipc/msg.c | 6 ------ net/tipc/msg.h | 7 ------- 3 files changed, 16 deletions(-) (limited to 'net') diff --git a/net/tipc/link.c b/net/tipc/link.c index a572f0a..43639ff 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2423,9 +2423,6 @@ static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) else destaddr = msg_destnode(inmsg); - if (msg_routed(inmsg)) - msg_set_prevnode(inmsg, tipc_own_addr); - /* Prepare reusable fragment header: */ tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0562b38..6d92d17 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -192,8 +192,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN TYPE %u", msg_type(msg)); } - if (msg_routed(msg) && !msg_non_seq(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); @@ -210,8 +208,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN:%x", msg_type(msg)); } - if (msg_routed(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); @@ -232,8 +228,6 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) default: tipc_printf(buf, "UNKNOWN TYPE:%x", msg_type(msg)); } - if (msg_routed(msg)) - tipc_printf(buf, "ROUT:"); if (msg_reroute_cnt(msg)) tipc_printf(buf, "REROUTED(%u):", msg_reroute_cnt(msg)); break; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 7b4f4d7..de02339 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -421,13 +421,6 @@ static inline int msg_is_dest(struct tipc_msg *m, u32 d) return msg_short(m) || (msg_destnode(m) == d); } -static inline u32 msg_routed(struct tipc_msg *m) -{ - if (likely(msg_short(m))) - return 0; - return (msg_destnode(m) ^ msg_orignode(m)) >> 11; -} - static inline u32 msg_nametype(struct tipc_msg *m) { return msg_word(m, 8); -- cgit v1.1 From 46af31800b6916c92fffa529dc3c357008da957d Mon Sep 17 00:00:00 2001 From: Hiroaki SHIMODA Date: Wed, 9 Mar 2011 20:09:58 +0000 Subject: ipv4: Fix PMTU update. On current net-next-2.6, when Linux receives ICMP Type: 3, Code: 4 (Destination unreachable (Fragmentation needed)), icmp_unreach -> ip_rt_frag_needed (peer->pmtu_expires is set here) -> tcp_v4_err -> do_pmtu_discovery -> ip_rt_update_pmtu (peer->pmtu_expires is already set, so check_peer_pmtu is skipped.) -> check_peer_pmtu check_peer_pmtu is skipped and MTU is not updated. To fix this, let check_peer_pmtu execute unconditionally. And some minor fixes 1) Avoid potential peer->pmtu_expires set to be zero. 2) In check_peer_pmtu, argument of time_before is reversed. 3) check_peer_pmtu expects peer->pmtu_orig is initialized as zero, but not initialized. Signed-off-by: Hiroaki SHIMODA Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 1 + net/ipv4/route.c | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 6442c35..86b1d08 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -511,6 +511,7 @@ struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) p->rate_tokens = 0; p->rate_last = 0; p->pmtu_expires = 0; + p->pmtu_orig = 0; memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->unused); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5655095..209989c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1533,9 +1533,15 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + unsigned long pmtu_expires; + + pmtu_expires = jiffies + ip_rt_mtu_expires; + if (!pmtu_expires) + pmtu_expires = 1UL; + est_mtu = mtu; peer->pmtu_learned = mtu; - peer->pmtu_expires = jiffies + ip_rt_mtu_expires; + peer->pmtu_expires = pmtu_expires; } inet_putpeer(peer); @@ -1549,7 +1555,7 @@ static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) { unsigned long expires = peer->pmtu_expires; - if (time_before(expires, jiffies)) { + if (time_before(jiffies, expires)) { u32 orig_dst_mtu = dst_mtu(dst); if (peer->pmtu_learned < orig_dst_mtu) { if (!peer->pmtu_orig) @@ -1574,14 +1580,20 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; if (!peer->pmtu_expires || mtu < peer->pmtu_learned) { + unsigned long pmtu_expires; + + pmtu_expires = jiffies + ip_rt_mtu_expires; + if (!pmtu_expires) + pmtu_expires = 1UL; + peer->pmtu_learned = mtu; - peer->pmtu_expires = jiffies + ip_rt_mtu_expires; + peer->pmtu_expires = pmtu_expires; atomic_inc(&__rt_peer_genid); rt->rt_peer_genid = rt_peer_genid(); - - check_peer_pmtu(dst, peer); } + check_peer_pmtu(dst, peer); + inet_putpeer(peer); } } -- cgit v1.1 From 1ce3644ade9c865c755bf0f6a4e109b7bb6eb60f Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 8 Mar 2011 00:06:31 +0000 Subject: xfrm: Use separate low and high order bits of the sequence numbers in xfrm_skb_cb To support IPsec extended sequence numbers, we split the output sequence numbers of xfrm_skb_cb in low and high order 32 bits and we add the high order 32 bits to the input sequence numbers. All users are updated accordingly. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 4 ++-- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 4 ++-- net/xfrm/xfrm_input.c | 4 ++-- net/xfrm/xfrm_output.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 325053d..4286fd3 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -208,7 +208,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, 0, skb->len); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e42a905..882dbbb 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -215,7 +215,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } esph->spi = x->id.spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, @@ -227,7 +227,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) aead_givcrypt_set_crypt(req, sg, sg, clen, iv); aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output); + XFRM_SKB_CB(skb)->seq.output.low); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 1aba54a..2195ae6 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -409,7 +409,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->reserved = 0; ah->spi = x->id.spi; - ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, 0, skb->len); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 1b5c982..c7b5d5e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -204,7 +204,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output); + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, @@ -216,7 +216,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) aead_givcrypt_set_crypt(req, sg, sg, clen, iv); aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output); + XFRM_SKB_CB(skb)->seq.output.low); ESP_SKB_CB(skb)->tmp = tmp; err = crypto_aead_givencrypt(req); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 45f1c98..b173b7f 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -118,7 +118,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) if (encap_type < 0) { async = 1; x = xfrm_input_state(skb); - seq = XFRM_SKB_CB(skb)->seq.input; + seq = XFRM_SKB_CB(skb)->seq.input.low; goto resume; } @@ -184,7 +184,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); - XFRM_SKB_CB(skb)->seq.input = seq; + XFRM_SKB_CB(skb)->seq.input.low = seq; nexthdr = x->type->input(x, skb); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 64f2ae1..4b63776 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -68,7 +68,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) } if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { - XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; + XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; if (unlikely(x->replay.oseq == 0)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); x->replay.oseq--; -- cgit v1.1 From 0dc49e9b28a7253ff05be2794d747f8ea5f1f423 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 8 Mar 2011 00:07:14 +0000 Subject: esp4: Add support for IPsec extended sequence numbers This patch adds IPsec extended sequence numbers support to esp4. We use the authencesn crypto algorithm to handle esp with separate encryption/authentication algorithms. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 882dbbb..03f994b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -33,11 +33,14 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) { unsigned int len; - len = crypto_aead_ivsize(aead); + len = seqhilen; + + len += crypto_aead_ivsize(aead); + if (len) { len += crypto_aead_alignmask(aead) & ~(crypto_tfm_ctx_alignment() - 1); @@ -52,10 +55,15 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) return kmalloc(len, GFP_ATOMIC); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +static inline __be32 *esp_tmp_seqhi(void *tmp) +{ + return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); +} +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; + PTR_ALIGN((u8 *)tmp + seqhilen, + crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } static inline struct aead_givcrypt_request *esp_tmp_givreq( @@ -122,6 +130,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int plen; int tfclen; int nfrags; + int assoclen; + int sglists; + int seqhilen; + __be32 *seqhi; /* skb is pure payload to encrypt */ @@ -151,14 +163,25 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) goto error; nfrags = err; - tmp = esp_alloc_tmp(aead, nfrags + 1); + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto error; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_givreq(aead, iv); asg = esp_givreq_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -221,11 +244,19 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) skb_to_sgvec(skb, sg, esph->enc_data + crypto_aead_ivsize(aead) - skb->data, clen + alen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_givcrypt_set_callback(req, 0, esp_output_done, skb); aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq.output.low); @@ -346,6 +377,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); int nfrags; + int assoclen; + int sglists; + int seqhilen; + __be32 *seqhi; void *tmp; u8 *iv; struct scatterlist *sg; @@ -362,16 +397,27 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; nfrags = err; + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + err = -ENOMEM; - tmp = esp_alloc_tmp(aead, nfrags + 1); + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto out; ESP_SKB_CB(skb)->tmp = tmp; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); asg = esp_req_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; skb->ip_summed = CHECKSUM_NONE; @@ -382,11 +428,19 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, sizeof(*esph)); + aead_request_set_assoc(req, asg, assoclen); err = crypto_aead_decrypt(req); if (err == -EINPROGRESS) @@ -500,10 +554,20 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; err = -ENAMETOOLONG; - if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", - x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) - goto error; + + if ((x->props.flags & XFRM_STATE_ESN)) { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authencesn(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } else { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); -- cgit v1.1 From d212a4c29096484e5e83b5006e695add126260af Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 8 Mar 2011 00:07:51 +0000 Subject: esp6: Add support for IPsec extended sequence numbers This patch adds IPsec extended sequence numbers support to esp6. We use the authencesn crypto algorithm to handle esp with separate encryption/authentication algorithms. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c7b5d5e..5aa8ec8 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -54,16 +54,20 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu); /* * Allocate an AEAD request structure with extra space for SG and IV. * - * For alignment considerations the IV is placed at the front, followed - * by the request and finally the SG list. + * For alignment considerations the upper 32 bits of the sequence number are + * placed at the front, if present. Followed by the IV, the request and finally + * the SG list. * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) { unsigned int len; - len = crypto_aead_ivsize(aead); + len = seqihlen; + + len += crypto_aead_ivsize(aead); + if (len) { len += crypto_aead_alignmask(aead) & ~(crypto_tfm_ctx_alignment() - 1); @@ -78,10 +82,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags) return kmalloc(len, GFP_ATOMIC); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp) +static inline __be32 *esp_tmp_seqhi(void *tmp) +{ + return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); +} + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp, crypto_aead_alignmask(aead) + 1) : tmp; + PTR_ALIGN((u8 *)tmp + seqhilen, + crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } static inline struct aead_givcrypt_request *esp_tmp_givreq( @@ -145,8 +155,12 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int plen; int tfclen; int nfrags; + int assoclen; + int sglists; + int seqhilen; u8 *iv; u8 *tail; + __be32 *seqhi; struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ @@ -175,14 +189,25 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) goto error; nfrags = err; - tmp = esp_alloc_tmp(aead, nfrags + 1); + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto error; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_givreq(aead, iv); asg = esp_givreq_sg(aead, req); - sg = asg + 1; + sg = asg + sglists; /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -210,11 +235,19 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) skb_to_sgvec(skb, sg, esph->enc_data + crypto_aead_ivsize(aead) - skb->data, clen + alen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_givcrypt_set_callback(req, 0, esp_output_done, skb); aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, sizeof(*esph)); + aead_givcrypt_set_assoc(req, asg, assoclen); aead_givcrypt_set_giv(req, esph->enc_data, XFRM_SKB_CB(skb)->seq.output.low); @@ -292,8 +325,12 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) struct sk_buff *trailer; int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); int nfrags; + int assoclen; + int sglists; + int seqhilen; int ret = 0; void *tmp; + __be32 *seqhi; u8 *iv; struct scatterlist *sg; struct scatterlist *asg; @@ -314,12 +351,24 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) } ret = -ENOMEM; - tmp = esp_alloc_tmp(aead, nfrags + 1); + + assoclen = sizeof(*esph); + sglists = 1; + seqhilen = 0; + + if (x->props.flags & XFRM_STATE_ESN) { + sglists += 2; + seqhilen += sizeof(__be32); + assoclen += seqhilen; + } + + tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); if (!tmp) goto out; ESP_SKB_CB(skb)->tmp = tmp; - iv = esp_tmp_iv(aead, tmp); + seqhi = esp_tmp_seqhi(tmp); + iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); asg = esp_req_sg(aead, req); sg = asg + 1; @@ -333,11 +382,19 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); - sg_init_one(asg, esph, sizeof(*esph)); + + if ((x->props.flags & XFRM_STATE_ESN)) { + sg_init_table(asg, 3); + sg_set_buf(asg, &esph->spi, sizeof(__be32)); + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(asg + 1, seqhi, seqhilen); + sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); + } else + sg_init_one(asg, esph, sizeof(*esph)); aead_request_set_callback(req, 0, esp_input_done, skb); aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, sizeof(*esph)); + aead_request_set_assoc(req, asg, assoclen); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) @@ -443,10 +500,20 @@ static int esp_init_authenc(struct xfrm_state *x) goto error; err = -ENAMETOOLONG; - if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "authenc(%s,%s)", - x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) - goto error; + + if ((x->props.flags & XFRM_STATE_ESN)) { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authencesn(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } else { + if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, + "authenc(%s,%s)", + x->aalg ? x->aalg->alg_name : "digest_null", + x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); -- cgit v1.1 From 9fdc4883d92d20842c5acea77a4a21bb1574b495 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 8 Mar 2011 00:08:32 +0000 Subject: xfrm: Move IPsec replay detection functions to a separate file To support multiple versions of replay detection, we move the replay detection functions to a separate file and make them accessible via function pointers contained in the struct xfrm_replay. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/Makefile | 2 +- net/xfrm/xfrm_input.c | 5 +- net/xfrm/xfrm_output.c | 15 ++---- net/xfrm/xfrm_replay.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_state.c | 111 ++------------------------------------ net/xfrm/xfrm_user.c | 4 +- 6 files changed, 154 insertions(+), 124 deletions(-) create mode 100644 net/xfrm/xfrm_replay.c (limited to 'net') diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index c631047..aa429ee 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o xfrm_algo.o \ - xfrm_sysctl.o + xfrm_sysctl.o xfrm_replay.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index b173b7f..55d5f5c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -172,7 +172,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop_unlock; } - if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { + if (x->props.replay_window && x->repl->check(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } @@ -206,8 +206,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (x->props.replay_window) - xfrm_replay_advance(x, seq); + x->repl->advance(x, seq); x->curlft.bytes += skb->len; x->curlft.packets++; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 4b63776..1aba03f 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -67,17 +67,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error; } - if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { - XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; - if (unlikely(x->replay.oseq == 0)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); - x->replay.oseq--; - xfrm_audit_state_replay_overflow(x, skb); - err = -EOVERFLOW; - goto error; - } - if (xfrm_aevent_is_on(net)) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); + err = x->repl->overflow(x, skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); + goto error; } x->curlft.bytes += skb->len; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c new file mode 100644 index 0000000..42d68f3 --- /dev/null +++ b/net/xfrm/xfrm_replay.c @@ -0,0 +1,141 @@ +/* + * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c. + */ + +#include + +static void xfrm_replay_notify(struct xfrm_state *x, int event) +{ + struct km_event c; + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (x->replay_maxdiff && + (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && + (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(&x->replay, &x->preplay, + sizeof(struct xfrm_replay_state)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + +static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++x->replay.oseq; + if (unlikely(x->replay.oseq == 0)) { + x->replay.oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > x->replay.seq)) + return 0; + + diff = x->replay.seq - seq; + if (diff >= min_t(unsigned int, x->props.replay_window, + sizeof(x->replay.bitmap) * 8)) { + x->stats.replay_window++; + goto err; + } + + if (x->replay.bitmap & (1U << diff)) { + x->stats.replay++; + goto err; + } + return 0; + +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) +{ + u32 diff; + u32 seq = ntohl(net_seq); + + if (!x->props.replay_window) + return; + + if (seq > x->replay.seq) { + diff = seq - x->replay.seq; + if (diff < x->props.replay_window) + x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; + else + x->replay.bitmap = 1; + x->replay.seq = seq; + } else { + diff = x->replay.seq - seq; + x->replay.bitmap |= (1U << diff); + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + +static struct xfrm_replay xfrm_replay_legacy = { + .advance = xfrm_replay_advance, + .check = xfrm_replay_check, + .notify = xfrm_replay_notify, + .overflow = xfrm_replay_overflow, +}; + +int xfrm_init_replay(struct xfrm_state *x) +{ + x->repl = &xfrm_replay_legacy; + + return 0; +} +EXPORT_SYMBOL(xfrm_init_replay); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cd6be49..23779d1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -42,13 +42,6 @@ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); -#ifdef CONFIG_AUDITSYSCALL -static void xfrm_audit_state_replay(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq); -#else -#define xfrm_audit_state_replay(x, s, sq) do { ; } while (0) -#endif /* CONFIG_AUDITSYSCALL */ - static inline unsigned int xfrm_dst_hash(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, @@ -1619,54 +1612,6 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk) } EXPORT_SYMBOL(xfrm_state_walk_done); - -void xfrm_replay_notify(struct xfrm_state *x, int event) -{ - struct km_event c; - /* we send notify messages in case - * 1. we updated on of the sequence numbers, and the seqno difference - * is at least x->replay_maxdiff, in this case we also update the - * timeout of our timer function - * 2. if x->replay_maxage has elapsed since last update, - * and there were changes - * - * The state structure must be locked! - */ - - switch (event) { - case XFRM_REPLAY_UPDATE: - if (x->replay_maxdiff && - (x->replay.seq - x->preplay.seq < x->replay_maxdiff) && - (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) { - if (x->xflags & XFRM_TIME_DEFER) - event = XFRM_REPLAY_TIMEOUT; - else - return; - } - - break; - - case XFRM_REPLAY_TIMEOUT: - if ((x->replay.seq == x->preplay.seq) && - (x->replay.bitmap == x->preplay.bitmap) && - (x->replay.oseq == x->preplay.oseq)) { - x->xflags |= XFRM_TIME_DEFER; - return; - } - - break; - } - - memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state)); - c.event = XFRM_MSG_NEWAE; - c.data.aevent = event; - km_state_notify(x, &c); - - if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) - x->xflags &= ~XFRM_TIME_DEFER; -} - static void xfrm_replay_timer_handler(unsigned long data) { struct xfrm_state *x = (struct xfrm_state*)data; @@ -1675,7 +1620,7 @@ static void xfrm_replay_timer_handler(unsigned long data) if (x->km.state == XFRM_STATE_VALID) { if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); + x->repl->notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; } @@ -1683,57 +1628,6 @@ static void xfrm_replay_timer_handler(unsigned long data) spin_unlock(&x->lock); } -int xfrm_replay_check(struct xfrm_state *x, - struct sk_buff *skb, __be32 net_seq) -{ - u32 diff; - u32 seq = ntohl(net_seq); - - if (unlikely(seq == 0)) - goto err; - - if (likely(seq > x->replay.seq)) - return 0; - - diff = x->replay.seq - seq; - if (diff >= min_t(unsigned int, x->props.replay_window, - sizeof(x->replay.bitmap) * 8)) { - x->stats.replay_window++; - goto err; - } - - if (x->replay.bitmap & (1U << diff)) { - x->stats.replay++; - goto err; - } - return 0; - -err: - xfrm_audit_state_replay(x, skb, net_seq); - return -EINVAL; -} - -void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) -{ - u32 diff; - u32 seq = ntohl(net_seq); - - if (seq > x->replay.seq) { - diff = seq - x->replay.seq; - if (diff < x->props.replay_window) - x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; - else - x->replay.bitmap = 1; - x->replay.seq = seq; - } else { - diff = x->replay.seq - seq; - x->replay.bitmap |= (1U << diff); - } - - if (xfrm_aevent_is_on(xs_net(x))) - xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); -} - static LIST_HEAD(xfrm_km_list); static DEFINE_RWLOCK(xfrm_km_lock); @@ -2246,7 +2140,7 @@ void xfrm_audit_state_replay_overflow(struct xfrm_state *x, } EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow); -static void xfrm_audit_state_replay(struct xfrm_state *x, +void xfrm_audit_state_replay(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq) { struct audit_buffer *audit_buf; @@ -2261,6 +2155,7 @@ static void xfrm_audit_state_replay(struct xfrm_state *x, spi, spi, ntohl(net_seq)); audit_log_end(audit_buf); } +EXPORT_SYMBOL_GPL(xfrm_audit_state_replay); void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 468ab60..f7b3c85 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -475,8 +475,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, x->preplay.seq = x->replay.seq+x->replay_maxdiff; x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; - /* override default values from above */ + if ((err = xfrm_init_replay(x))) + goto error; + /* override default values from above */ xfrm_update_ae_params(x, attrs); return x; -- cgit v1.1 From 97e15c3a8504ea39a209778d7dcdbdf440404a91 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 8 Mar 2011 00:09:09 +0000 Subject: xfrm: Support anti-replay window size bigger than 32 packets As it is, the anti-replay bitmap in struct xfrm_replay_state can only accomodate 32 packets. Even though it is possible to configure anti-replay window sizes up to 255 packets from userspace. So we reject any packet with a sequence number within the configured window but outside the bitmap. With this patch, we represent the anti-replay window as a bitmap of variable length that can be accessed via the new struct xfrm_replay_state_esn. Thus, we have no limit on the window size anymore. To use the new anti-replay window implementantion, new userspace tools are required. We leave the old implementation untouched to stay in sync with old userspace tools. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_replay.c | 207 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 206 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 42d68f3..50589ea 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -1,5 +1,21 @@ /* * xfrm_replay.c - xfrm replay detection, derived from xfrm_state.c. + * + * Copyright (C) 2010 secunet Security Networks AG + * Copyright (C) 2010 Steffen Klassert + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include @@ -125,6 +141,178 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } +static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + if (unlikely(replay_esn->oseq == 0)) { + replay_esn->oseq--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_bmp(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 diff = replay_esn->seq - seq; + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (unlikely(seq == 0)) + goto err; + + if (likely(seq > replay_esn->seq)) + return 0; + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + + if (!replay_esn->replay_window) + return; + + if (seq > replay_esn->seq) { + diff = seq - replay_esn->seq; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + nr = replay_esn->replay_window >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + + replay_esn->seq = seq; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + +static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) +{ + struct km_event c; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct xfrm_replay_state_esn *preplay_esn = x->preplay_esn; + + /* we send notify messages in case + * 1. we updated on of the sequence numbers, and the seqno difference + * is at least x->replay_maxdiff, in this case we also update the + * timeout of our timer function + * 2. if x->replay_maxage has elapsed since last update, + * and there were changes + * + * The state structure must be locked! + */ + + switch (event) { + case XFRM_REPLAY_UPDATE: + if (x->replay_maxdiff && + (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) && + (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) { + if (x->xflags & XFRM_TIME_DEFER) + event = XFRM_REPLAY_TIMEOUT; + else + return; + } + + break; + + case XFRM_REPLAY_TIMEOUT: + if (memcmp(x->replay_esn, x->preplay_esn, + xfrm_replay_state_esn_len(replay_esn)) == 0) { + x->xflags |= XFRM_TIME_DEFER; + return; + } + + break; + } + + memcpy(x->preplay_esn, x->replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + c.event = XFRM_MSG_NEWAE; + c.data.aevent = event; + km_state_notify(x, &c); + + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + x->xflags &= ~XFRM_TIME_DEFER; +} + static struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, @@ -132,9 +320,26 @@ static struct xfrm_replay xfrm_replay_legacy = { .overflow = xfrm_replay_overflow, }; +static struct xfrm_replay xfrm_replay_bmp = { + .advance = xfrm_replay_advance_bmp, + .check = xfrm_replay_check_bmp, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_bmp, +}; + int xfrm_init_replay(struct xfrm_state *x) { - x->repl = &xfrm_replay_legacy; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (replay_esn) { + if (replay_esn->replay_window > + replay_esn->bmp_len * sizeof(__u32)) + return -EINVAL; + + x->repl = &xfrm_replay_bmp; + } else + x->repl = &xfrm_replay_legacy; + return 0; } -- cgit v1.1 From 2cd084678fc1eb75aec4f7ae3d339d232c00ec61 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 8 Mar 2011 00:09:51 +0000 Subject: xfrm: Add support for IPsec extended sequence numbers This patch adds support for IPsec extended sequence numbers (esn) as defined in RFC 4303. The bits to manage the anti-replay window are based on a patch from Alex Badea. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_input.c | 4 ++ net/xfrm/xfrm_replay.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 55d5f5c..872065c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -107,6 +107,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct net *net = dev_net(skb->dev); int err; __be32 seq; + __be32 seq_hi; struct xfrm_state *x; xfrm_address_t *daddr; struct xfrm_mode *inner_mode; @@ -184,7 +185,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); + XFRM_SKB_CB(skb)->seq.input.low = seq; + XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; nexthdr = x->type->input(x, skb); diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 50589ea..2f5be5b 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -20,6 +20,31 @@ #include +u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) +{ + u32 seq, seq_hi, bottom; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + + seq = ntohl(net_seq); + seq_hi = replay_esn->seq_hi; + bottom = replay_esn->seq - replay_esn->replay_window + 1; + + if (likely(replay_esn->seq >= replay_esn->replay_window - 1)) { + /* A. same subspace */ + if (unlikely(seq < bottom)) + seq_hi++; + } else { + /* B. window spans two subspaces */ + if (unlikely(seq >= bottom)) + seq_hi--; + } + + return seq_hi; +} + static void xfrm_replay_notify(struct xfrm_state *x, int event) { struct km_event c; @@ -313,6 +338,160 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event) x->xflags &= ~XFRM_TIME_DEFER; } +static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + XFRM_SKB_CB(skb)->seq.output.low = ++replay_esn->oseq; + XFRM_SKB_CB(skb)->seq.output.hi = replay_esn->oseq_hi; + + if (unlikely(replay_esn->oseq == 0)) { + XFRM_SKB_CB(skb)->seq.output.hi = ++replay_esn->oseq_hi; + + if (replay_esn->oseq_hi == 0) { + replay_esn->oseq--; + replay_esn->oseq_hi--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + } + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_check_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + unsigned int bitnr, nr; + u32 diff; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + u32 seq = ntohl(net_seq); + u32 pos = (replay_esn->seq - 1) % replay_esn->replay_window; + u32 wsize = replay_esn->replay_window; + u32 top = replay_esn->seq; + u32 bottom = top - wsize + 1; + + if (unlikely(seq == 0 && replay_esn->seq_hi == 0 && + (replay_esn->seq < replay_esn->replay_window - 1))) + goto err; + + diff = top - seq; + + if (likely(top >= wsize - 1)) { + /* A. same subspace */ + if (likely(seq > top) || seq < bottom) + return 0; + } else { + /* B. window spans two subspaces */ + if (likely(seq > top && seq < bottom)) + return 0; + if (seq >= bottom) + diff = ~seq + top + 1; + } + + if (diff >= replay_esn->replay_window) { + x->stats.replay_window++; + goto err; + } + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + if (replay_esn->bmp[nr] & (1U << bitnr)) + goto err_replay; + } + return 0; + +err_replay: + x->stats.replay++; +err: + xfrm_audit_state_replay(x, skb, net_seq); + return -EINVAL; +} + +static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) +{ + unsigned int bitnr, nr, i; + int wrap; + u32 diff, pos, seq, seq_hi; + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + + if (!replay_esn->replay_window) + return; + + seq = ntohl(net_seq); + pos = (replay_esn->seq - 1) % replay_esn->replay_window; + seq_hi = xfrm_replay_seqhi(x, net_seq); + wrap = seq_hi - replay_esn->seq_hi; + + if ((!wrap && seq > replay_esn->seq) || wrap > 0) { + if (likely(!wrap)) + diff = seq - replay_esn->seq; + else + diff = ~replay_esn->seq + seq + 1; + + if (diff < replay_esn->replay_window) { + for (i = 1; i < diff; i++) { + bitnr = (pos + i) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] &= ~(1U << bitnr); + } + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + nr = replay_esn->replay_window >> 5; + for (i = 0; i <= nr; i++) + replay_esn->bmp[i] = 0; + + bitnr = (pos + diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + + replay_esn->seq = seq; + + if (unlikely(wrap > 0)) + replay_esn->seq_hi++; + } else { + diff = replay_esn->seq - seq; + + if (pos >= diff) { + bitnr = (pos - diff) % replay_esn->replay_window; + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } else { + bitnr = replay_esn->replay_window - (diff - pos); + nr = bitnr >> 5; + bitnr = bitnr & 0x1F; + replay_esn->bmp[nr] |= (1U << bitnr); + } + } + + if (xfrm_aevent_is_on(xs_net(x))) + xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); +} + static struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, @@ -327,6 +506,13 @@ static struct xfrm_replay xfrm_replay_bmp = { .overflow = xfrm_replay_overflow_bmp, }; +static struct xfrm_replay xfrm_replay_esn = { + .advance = xfrm_replay_advance_esn, + .check = xfrm_replay_check_esn, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_esn, +}; + int xfrm_init_replay(struct xfrm_state *x) { struct xfrm_replay_state_esn *replay_esn = x->replay_esn; @@ -336,11 +522,13 @@ int xfrm_init_replay(struct xfrm_state *x) replay_esn->bmp_len * sizeof(__u32)) return -EINVAL; + if ((x->props.flags & XFRM_STATE_ESN) && x->replay_esn) + x->repl = &xfrm_replay_esn; + else x->repl = &xfrm_replay_bmp; } else x->repl = &xfrm_replay_legacy; - return 0; } EXPORT_SYMBOL(xfrm_init_replay); -- cgit v1.1 From d8647b79c3b7e223ac051439d165bc8e7bbb832f Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 8 Mar 2011 00:10:27 +0000 Subject: xfrm: Add user interface for esn and big anti-replay windows This patch adds a netlink based user interface to configure esn and big anti-replay windows. The new netlink attribute XFRMA_REPLAY_ESN_VAL is used to configure the new implementation. If the XFRM_STATE_ESN flag is set, we use esn and support for big anti-replay windows for the configured state. If this flag is not set we use the new implementation with 32 bit sequence numbers. A big anti-replay window can be configured in this case anyway. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 ++ net/xfrm/xfrm_user.c | 99 +++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 87 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 23779d1..d575f05 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -356,6 +356,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->calg); kfree(x->encap); kfree(x->coaddr); + kfree(x->replay_esn); + kfree(x->preplay_esn); if (x->inner_mode) xfrm_put_mode(x->inner_mode); if (x->inner_mode_iaf) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f7b3c85..706385a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -119,6 +119,19 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs) return 0; } +static inline int verify_replay(struct xfrm_usersa_info *p, + struct nlattr **attrs) +{ + struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; + + if (!rt) + return 0; + + if (p->replay_window != 0) + return -EINVAL; + + return 0; +} static int verify_newsa_info(struct xfrm_usersa_info *p, struct nlattr **attrs) @@ -214,6 +227,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(attrs))) goto out; + if ((err = verify_replay(p, attrs))) + goto out; err = -EINVAL; switch (p->mode) { @@ -345,6 +360,33 @@ static int attach_aead(struct xfrm_algo_aead **algpp, u8 *props, return 0; } +static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, + struct xfrm_replay_state_esn **preplay_esn, + struct nlattr *rta) +{ + struct xfrm_replay_state_esn *p, *pp, *up; + + if (!rta) + return 0; + + up = nla_data(rta); + + p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + if (!p) + return -ENOMEM; + + pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL); + if (!pp) { + kfree(p); + return -ENOMEM; + } + + *replay_esn = p; + *preplay_esn = pp; + + return 0; +} + static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { int len = 0; @@ -380,10 +422,20 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; + if (re) { + struct xfrm_replay_state_esn *replay_esn; + replay_esn = nla_data(re); + memcpy(x->replay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + memcpy(x->preplay_esn, replay_esn, + xfrm_replay_state_esn_len(replay_esn)); + } + if (rp) { struct xfrm_replay_state *replay; replay = nla_data(rp); @@ -467,13 +519,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) goto error; + if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, + attrs[XFRMA_REPLAY_ESN_VAL]))) + goto error; + x->km.seq = p->seq; x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; - x->preplay.bitmap = 0; - x->preplay.seq = x->replay.seq+x->replay_maxdiff; - x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; if ((err = xfrm_init_replay(x))) goto error; @@ -709,6 +762,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (xfrm_mark_put(skb, &x->mark)) goto nla_put_failure; + if (x->replay_esn) + NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn); + if (x->security && copy_sec_ctx(x->security, skb) < 0) goto nla_put_failure; @@ -1578,10 +1635,14 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } -static inline size_t xfrm_aevent_msgsize(void) +static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) { + size_t replay_size = x->replay_esn ? + xfrm_replay_state_esn_len(x->replay_esn) : + sizeof(struct xfrm_replay_state); + return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) - + nla_total_size(sizeof(struct xfrm_replay_state)) + + nla_total_size(replay_size) + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ @@ -1606,7 +1667,13 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct id->reqid = x->props.reqid; id->flags = c->data.aevent; - NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + if (x->replay_esn) + NLA_PUT(skb, XFRMA_REPLAY_ESN_VAL, + xfrm_replay_state_esn_len(x->replay_esn), + x->replay_esn); + else + NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); if (id->flags & XFRM_AE_RTHR) @@ -1639,16 +1706,16 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; - r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); - if (r_skb == NULL) - return -ENOMEM; - mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); - if (x == NULL) { - kfree_skb(r_skb); + if (x == NULL) return -ESRCH; + + r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); + if (r_skb == NULL) { + xfrm_state_put(x); + return -ENOMEM; } /* @@ -1680,9 +1747,10 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; - if (!lt && !rp) + if (!lt && !rp && !re) return err; /* pedantic mode - thou shalt sayeth replaceth */ @@ -2147,6 +2215,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, [XFRMA_TFCPAD] = { .type = NLA_U32 }, + [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, }; static struct xfrm_link { @@ -2274,7 +2343,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event struct net *net = xs_net(x); struct sk_buff *skb; - skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); + skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -2328,6 +2397,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->encap)); if (x->tfcpad) l += nla_total_size(sizeof(x->tfcpad)); + if (x->replay_esn) + l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn)); if (x->security) l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + x->security->ctx_len); -- cgit v1.1 From 4e75db2e8ff2c97762e87f61f54d7cdeaab1a6b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 13 Mar 2011 23:22:23 -0700 Subject: inetpeer: should use call_rcu() variant After commit 7b46ac4e77f3224a (inetpeer: Don't disable BH for initial fast RCU lookup.), we should use call_rcu() to wait proper RCU grace period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 86b1d08..dd1b20e 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -399,7 +399,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) write_sequnlock_bh(&base->lock); if (do_free) - call_rcu_bh(&p->rcu, inetpeer_free_rcu); + call_rcu(&p->rcu, inetpeer_free_rcu); else /* The node is used again. Decrease the reference counter * back. The loop "cleanup -> unlink_from_unused -- cgit v1.1 From 1fa073803ec543e8b95fc5acf164fa2e0074bb4f Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 14 Mar 2011 12:03:44 -0400 Subject: tipc: delete extra semicolon blocking node deletion Remove bogus semicolon only recently introduced in 34e46258cb9f5 that blocks cleanup of nodes for N>1 on shutdown. Signed-off-by: Paul Gortmaker --- net/tipc/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/net.c b/net/tipc/net.c index 8fbc7e6..68b3dd6 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -221,7 +221,7 @@ void tipc_net_stop(void) tipc_bearer_stop(); tipc_mode = TIPC_NODE_MODE; tipc_bclink_stop(); - list_for_each_entry_safe(node, t_node, &tipc_node_list, list); + list_for_each_entry_safe(node, t_node, &tipc_node_list, list) tipc_node_delete(node); write_unlock_bh(&tipc_net_lock); info("Left network mode\n"); -- cgit v1.1 From 42046e2e45c109ba703993c510401a11f716c8df Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 14 Mar 2011 19:11:44 +0100 Subject: netfilter: x_tables: return -ENOENT for non-existant matches/targets As Stephen correctly points out, we need to return -ENOENT in xt_find_match()/xt_find_target() after the patch "netfilter: x_tables: misuse of try_then_request_module" in order to properly indicate a non-existant module to the caller. Signed-off-by: Patrick McHardy --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 271eed3..a9adf4c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -190,7 +190,7 @@ EXPORT_SYMBOL(xt_unregister_matches); struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) { struct xt_match *m; - int err = 0; + int err = -ENOENT; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); @@ -235,7 +235,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_match); struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; - int err = 0; + int err = -ENOENT; if (mutex_lock_interruptible(&xt[af].mutex) != 0) return ERR_PTR(-EINTR); -- cgit v1.1 From fe8f661f2c2bb058822f13f6f232e121bde1338f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 14 Mar 2011 19:20:44 +0100 Subject: netfilter: nf_conntrack: fix sysctl memory leak Message in log because sysctl table was not empty at netns exit WARNING: at net/sysctl_net.c:84 sysctl_net_exit+0x2a/0x2c() Instrumenting showed that the nf_conntrack_timestamp was the entry that was being created but not cleared. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1909311..1181236 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1294,6 +1294,7 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); nf_conntrack_ecache_fini(net); + nf_conntrack_tstamp_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); kmem_cache_destroy(net->ct.nf_conntrack_cachep); -- cgit v1.1 From cf28d7934c57168d530b606c26ab955a56eb13f9 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Wed, 9 Mar 2011 10:02:38 +0100 Subject: mac80211: Shortcut minstrel_ht rate setup for non-MRR capable devices Devices without multi rate retry support won't be able to use all rates as specified by mintrel_ht. Hence, we can simply skip setting up further rates as the devices will only use the first one. Also add a special case for devices with only two possible tx rates. We use sample_rate -> max_prob_rate for sampling and max_tp_rate -> max_prob_rate by default. Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- net/mac80211/rc80211_minstrel_ht.c | 41 +++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index bce14fb..8212a8b 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -598,19 +598,46 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, sample = true; minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, txrc, true, false); - minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, - txrc, false, false); info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } else { minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, txrc, false, false); - minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, - txrc, false, true); } - minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, !sample); - ar[3].count = 0; - ar[3].idx = -1; + if (mp->hw->max_rates >= 3) { + /* + * At least 3 tx rates supported, use + * sample_rate -> max_tp_rate -> max_prob_rate for sampling and + * max_tp_rate -> max_tp_rate2 -> max_prob_rate by default. + */ + if (sample_idx >= 0) + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, + txrc, false, false); + else + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, + txrc, false, true); + + minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, + txrc, false, !sample); + + ar[3].count = 0; + ar[3].idx = -1; + } else if (mp->hw->max_rates == 2) { + /* + * Only 2 tx rates supported, use + * sample_rate -> max_prob_rate for sampling and + * max_tp_rate -> max_prob_rate by default. + */ + minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate, + txrc, false, !sample); + + ar[2].count = 0; + ar[2].idx = -1; + } else { + /* Not using MRR, only use the first rate */ + ar[1].count = 0; + ar[1].idx = -1; + } mi->total_packets++; -- cgit v1.1 From 9db372fdd5de9e0464c77a9d3db2a3b356db8668 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 11 Mar 2011 21:45:51 +0100 Subject: mac80211: fix channel type recalculation with HT and non-HT interfaces When running an AP interface along with the cooked monitor interface created by hostapd, adding an interface and deleting it again triggers a channel type recalculation during which the (non-HT) monitor interface takes precedence over the HT AP interface, thus causing the channel type to be set to non-HT. Fix this by ensuring that a more wide channel type will not be overwritten by a less wide channel type. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/chan.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 5b24740..889c3e9 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -77,6 +77,9 @@ bool ieee80211_set_channel_type(struct ieee80211_local *local, switch (tmp->vif.bss_conf.channel_type) { case NL80211_CHAN_NO_HT: case NL80211_CHAN_HT20: + if (superchan > tmp->vif.bss_conf.channel_type) + break; + superchan = tmp->vif.bss_conf.channel_type; break; case NL80211_CHAN_HT40PLUS: -- cgit v1.1 From 05aebe2e5d009314a1d9b47ad9cda59ccb57d76d Mon Sep 17 00:00:00 2001 From: Daniel Turull Date: Mon, 14 Mar 2011 13:47:40 -0700 Subject: pktgen: bug fix in transmission headers with frags=0 (bug introduced by commit 26ad787962ef84677a48c560 (pktgen: speedup fragmented skbs) The headers of pktgen were incorrectly added in a pktgen packet without frags (frags=0). There was an offset in the pktgen headers. The cause was in reusing the pgh variable as a return variable in skb_put when adding the payload to the skb. Signed-off-by: Daniel Turull Signed-off-by: David S. Miller Signed-off-by: Eric Dumazet --- net/core/pktgen.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f0aec6c..0c55eaa 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2620,8 +2620,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, datalen -= sizeof(*pgh); if (pkt_dev->nfrags <= 0) { - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - memset(pgh + 1, 0, datalen); + memset(skb_put(skb, datalen), 0, datalen); } else { int frags = pkt_dev->nfrags; int i, len; -- cgit v1.1 From 1faa4356a3bd89ea11fb92752d897cff3a20ec0e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 7 Mar 2011 08:34:06 +0000 Subject: bridge: control carrier based on ports online MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the bridge device behave like a physical device. In earlier releases the bridge always asserted carrier. This changes the behavior so that bridge device carrier is on only if one or more ports are in the forwarding state. This should help IPv6 autoconfiguration, DHCP, and routing daemons. I did brief testing with Network and Virt manager and they seem fine, but since this changes behavior of bridge, it should wait until net-next (2.6.39). Signed-off-by: Stephen Hemminger Reviewed-by: Nicolas de Pesloüan Tested-By: Adam Majer Signed-off-by: David S. Miller --- net/bridge/br_device.c | 4 ++++ net/bridge/br_stp.c | 35 ++++++++++++++++++++++------------- net/bridge/br_stp_timer.c | 1 + 3 files changed, 27 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 1461b19..21e5901 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -78,6 +78,8 @@ static int br_dev_open(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + netif_carrier_off(dev); + br_features_recompute(br); netif_start_queue(dev); br_stp_enable_bridge(br); @@ -94,6 +96,8 @@ static int br_dev_stop(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + netif_carrier_off(dev); + br_stp_disable_bridge(br); br_multicast_stop(br); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 57186d8..a5badd0 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -397,28 +397,37 @@ static void br_make_forwarding(struct net_bridge_port *p) void br_port_state_selection(struct net_bridge *br) { struct net_bridge_port *p; + unsigned int liveports = 0; /* Don't change port states if userspace is handling STP */ if (br->stp_enabled == BR_USER_STP) return; list_for_each_entry(p, &br->port_list, list) { - if (p->state != BR_STATE_DISABLED) { - if (p->port_no == br->root_port) { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_forwarding(p); - } else if (br_is_designated_port(p)) { - del_timer(&p->message_age_timer); - br_make_forwarding(p); - } else { - p->config_pending = 0; - p->topology_change_ack = 0; - br_make_blocking(p); - } + if (p->state == BR_STATE_DISABLED) + continue; + + if (p->port_no == br->root_port) { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_forwarding(p); + } else if (br_is_designated_port(p)) { + del_timer(&p->message_age_timer); + br_make_forwarding(p); + } else { + p->config_pending = 0; + p->topology_change_ack = 0; + br_make_blocking(p); } + if (p->state == BR_STATE_FORWARDING) + ++liveports; } + + if (liveports == 0) + netif_carrier_off(br->dev); + else + netif_carrier_on(br->dev); } /* called under bridge lock */ diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7b22456..3e96514 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -94,6 +94,7 @@ static void br_forward_delay_timer_expired(unsigned long arg) p->state = BR_STATE_FORWARDING; if (br_is_designated_for_some_port(br)) br_topology_change_detection(br); + netif_carrier_on(br->dev); } br_log_state(p); spin_unlock(&br->lock); -- cgit v1.1 From a461c0297f2e80c78eaa03fc5141bf57a814ff4f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 10 Mar 2011 05:57:04 +0000 Subject: bridge: skip forwarding delay if not using STP If Spanning Tree Protocol is not enabled, there is no good reason for the bridge code to wait for the forwarding delay period before enabling the link. The purpose of the forwarding delay is to allow STP to learn about other bridges before nominating itself. The only possible impact is that when starting up a new port the bridge may flood a packet now, where previously it might have seen traffic from the other host and preseeded the forwarding table. Includes change for local variable br already available in that func. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_stp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 57186d8..47582d3 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -375,12 +375,12 @@ static void br_make_forwarding(struct net_bridge_port *p) if (p->state != BR_STATE_BLOCKING) return; - if (br->forward_delay == 0) { + if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) { p->state = BR_STATE_FORWARDING; br_topology_change_detection(br); del_timer(&p->forward_delay_timer); } - else if (p->br->stp_enabled == BR_KERNEL_STP) + else if (br->stp_enabled == BR_KERNEL_STP) p->state = BR_STATE_LISTENING; else p->state = BR_STATE_LEARNING; -- cgit v1.1 From e5537bfc98f01561fbdfbd8a78f0dc3e2360491d Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Mon, 14 Mar 2011 15:25:33 -0700 Subject: af_unix: update locking comment We latch our state using a spinlock not a r/w kind of lock. Signed-off-by: Daniel Baluta Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 437a99e..b213ce6 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1124,7 +1124,7 @@ restart: /* Latch our state. - It is tricky place. We need to grab write lock and cannot + It is tricky place. We need to grab our state lock and cannot drop lock on peer. It is dangerous because deadlock is possible. Connect to self case and simultaneous attempt to connect are eliminated by checking socket -- cgit v1.1 From febf081987ec445f071ed10b73e9707a88cc5cc4 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:12 +0000 Subject: tcp: fix RTT for quick packets in congestion control In the congestion control interface, the callback for each ACK includes an estimated round trip time in microseconds. Some algorithms need high resolution (Vegas style) but most only need jiffie resolution. If RTT is not accurate (like a retransmission) -1 is used as a flag value. When doing coarse resolution if RTT is less than a a jiffie then 0 should be returned rather than no estimate. Otherwise algorithms that expect good ack's to trigger slow start (like CUBIC Hystart) will be confused. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 65f6c04..e16b17e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3350,7 +3350,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, net_invalid_timestamp())) rtt_us = ktime_us_delta(ktime_get_real(), last_ackt); - else if (ca_seq_rtt > 0) + else if (ca_seq_rtt >= 0) rtt_us = jiffies_to_usecs(ca_seq_rtt); } -- cgit v1.1 From c54b4b7655447c1f24f6d50779c22eba9ee0fd24 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:13 +0000 Subject: tcp_cubic: fix comparison of jiffies Jiffies wraps around therefore the correct way to compare is to use cast to signed value. Note: cubic is not using full jiffies value on 64 bit arch because using full unsigned long makes struct bictcp grow too large for the available ca_priv area. Includes correction from Sangtae Ha to improve ack train detection. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 71d5f2f..43bb34c 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -342,9 +342,11 @@ static void hystart_update(struct sock *sk, u32 delay) u32 curr_jiffies = jiffies; /* first detection parameter - ack-train detection */ - if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { + if ((s32)(curr_jiffies - ca->last_jiffies) <= + msecs_to_jiffies(2)) { ca->last_jiffies = curr_jiffies; - if (curr_jiffies - ca->round_start >= ca->delay_min>>4) + if ((s32) (curr_jiffies - ca->round_start) > + ca->delay_min >> 4) ca->found |= HYSTART_ACK_TRAIN; } -- cgit v1.1 From aac46324e12a2bf2e9e0855ad6a287945e34ad39 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:14 +0000 Subject: tcp_cubic: make ack train delta value a parameter Make the spacing between ACK's that indicates a train a tuneable value like other hystart values. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 43bb34c..66d3b00 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1; static int hystart __read_mostly = 1; static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; static int hystart_low_window __read_mostly = 16; +static int hystart_ack_delta __read_mostly = 2; static u32 cube_rtt_scale __read_mostly; static u32 beta_scale __read_mostly; @@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" " 1: packet-train 2: delay 3: both packet-train and delay"); module_param(hystart_low_window, int, 0644); MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); +module_param(hystart_ack_delta, int, 0644); +MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)"); /* BIC TCP Parameters */ struct bictcp { @@ -343,7 +346,7 @@ static void hystart_update(struct sock *sk, u32 delay) /* first detection parameter - ack-train detection */ if ((s32)(curr_jiffies - ca->last_jiffies) <= - msecs_to_jiffies(2)) { + msecs_to_jiffies(hystart_ack_delta)) { ca->last_jiffies = curr_jiffies; if ((s32) (curr_jiffies - ca->round_start) > ca->delay_min >> 4) -- cgit v1.1 From 17a6e9f1aa9ba07ca13a1eaf1e631e743af50cca Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:15 +0000 Subject: tcp_cubic: fix clock dependency The hystart code was written with assumption that HZ=1000. Replace the use of jiffies with bictcp_clock as a millisecond real time clock. Signed-off-by: Stephen Hemminger Reported-by: Lucas Nussbaum Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 66d3b00..f4fb2d4 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -88,7 +88,7 @@ struct bictcp { u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ u32 bic_K; /* time to origin point from the beginning of the current epoch */ - u32 delay_min; /* min delay */ + u32 delay_min; /* min delay (msec << 3) */ u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ @@ -98,7 +98,7 @@ struct bictcp { u8 found; /* the exit point is found? */ u32 round_start; /* beginning of each round */ u32 end_seq; /* end_seq of the round */ - u32 last_jiffies; /* last time when the ACK spacing is close */ + u32 last_ack; /* last time when the ACK spacing is close */ u32 curr_rtt; /* the minimum rtt of current round */ }; @@ -119,12 +119,21 @@ static inline void bictcp_reset(struct bictcp *ca) ca->found = 0; } +static inline u32 bictcp_clock(void) +{ +#if HZ < 1000 + return ktime_to_ms(ktime_get_real()); +#else + return jiffies_to_msecs(jiffies); +#endif +} + static inline void bictcp_hystart_reset(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - ca->round_start = ca->last_jiffies = jiffies; + ca->round_start = ca->last_ack = bictcp_clock(); ca->end_seq = tp->snd_nxt; ca->curr_rtt = 0; ca->sample_cnt = 0; @@ -239,8 +248,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) */ /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) - << BICTCP_HZ) / HZ; + t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) + - ca->epoch_start) << BICTCP_HZ) / HZ; if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; @@ -342,14 +351,12 @@ static void hystart_update(struct sock *sk, u32 delay) struct bictcp *ca = inet_csk_ca(sk); if (!(ca->found & hystart_detect)) { - u32 curr_jiffies = jiffies; + u32 now = bictcp_clock(); /* first detection parameter - ack-train detection */ - if ((s32)(curr_jiffies - ca->last_jiffies) <= - msecs_to_jiffies(hystart_ack_delta)) { - ca->last_jiffies = curr_jiffies; - if ((s32) (curr_jiffies - ca->round_start) > - ca->delay_min >> 4) + if ((s32)(now - ca->last_ack) <= hystart_ack_delta) { + ca->last_ack = now; + if ((s32)(now - ca->round_start) > ca->delay_min >> 4) ca->found |= HYSTART_ACK_TRAIN; } @@ -396,7 +403,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; - delay = usecs_to_jiffies(rtt_us) << 3; + delay = (rtt_us << 3) / USEC_PER_MSEC; if (delay == 0) delay = 1; -- cgit v1.1 From 3b585b34493ec9db382d6c325d4ed77b9eb2d2a5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:16 +0000 Subject: tcp_cubic: enable high resolution ack time if needed This is a refined version of an earlier patch by Lucas Nussbaum. Cubic needs RTT values in milliseconds. If HZ < 1000 then the values will be too coarse. Signed-off-by: Stephen Hemminger Reported-by: Lucas Nussbaum Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index f4fb2d4..5e0491d 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -459,6 +459,10 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); + /* hystart needs ms clock resolution */ + if (hystart && HZ < 1000) + cubictcp.flags |= TCP_CONG_RTT_STAMP; + return tcp_register_congestion_control(&cubictcp); } -- cgit v1.1 From 2b4636a5f8ca547000f6aba24ec1c58f31f4a91d Mon Sep 17 00:00:00 2001 From: Sangtae Ha Date: Mon, 14 Mar 2011 07:52:17 +0000 Subject: tcp_cubic: make the delay threshold of HyStart less sensitive Make HyStart less sensitive to abrupt delay variations due to buffer bloat. Signed-off-by: Sangtae Ha Acked-by: Stephen Hemminger Reported-by: Lucas Nussbaum Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 5e0491d..7172c12 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -39,7 +39,7 @@ /* Number of delay samples for detecting the increase of delay */ #define HYSTART_MIN_SAMPLES 8 -#define HYSTART_DELAY_MIN (2U<<3) +#define HYSTART_DELAY_MIN (4U<<3) #define HYSTART_DELAY_MAX (16U<<3) #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) -- cgit v1.1 From b5ccd07337489fa9c9d32e0b628a2168b7953adf Mon Sep 17 00:00:00 2001 From: Sangtae Ha Date: Mon, 14 Mar 2011 07:52:18 +0000 Subject: tcp_cubic: fix low utilization of CUBIC with HyStart HyStart sets the initial exit point of slow start. Suppose that HyStart exits at 0.5BDP in a BDP network and no history exists. If the BDP of a network is large, CUBIC's initial cwnd growth may be too conservative to utilize the link. CUBIC increases the cwnd 20% per RTT in this case. Signed-off-by: Sangtae Ha Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 7172c12..90d92dd 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -270,6 +270,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 100 * cwnd; /* very small increment*/ } + /* + * The initial growth of cubic function may be too conservative + * when the available bandwidth is still unknown. + */ + if (ca->loss_cwnd == 0 && ca->cnt > 20) + ca->cnt = 20; /* increase cwnd 5% per RTT */ + /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; -- cgit v1.1 From 698e1d23cfc15312be2e7665014afd98c49ae9a1 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Mon, 14 Mar 2011 09:01:02 +0000 Subject: net: dcbnl: Update copyright dates Signed-off-by: Mark Rustad Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 118392f..3609eac 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, Intel Corporation. + * Copyright (c) 2008-2011, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, -- cgit v1.1 From 097fc76a0805bdca17baf12cad9d3bcb215716a9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 4 Mar 2011 12:26:17 +0200 Subject: ipvs: avoid lookup for fwmark 0 Restore the previous behaviour to lookup for fwmark service only when fwmark is non-null. This saves only CPU. Signed-off-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c73b0c8..f0369d6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -411,9 +411,11 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, /* * Check the table hashed by fwmark first */ - svc = __ip_vs_svc_fwm_find(net, af, fwmark); - if (fwmark && svc) - goto out; + if (fwmark) { + svc = __ip_vs_svc_fwm_find(net, af, fwmark); + if (svc) + goto out; + } /* * Check the table hashed by -- cgit v1.1 From 4a569c0c0f833adace1e3aadaa38780ec2fcdf9e Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 4 Mar 2011 12:28:20 +0200 Subject: ipvs: remove _bh from percpu stats reading ip_vs_read_cpu_stats is called only from timer, so no need for _bh locks. Signed-off-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_est.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index f560a05..88bd716 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -69,10 +69,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, sum->inpkts += s->ustats.inpkts; sum->outpkts += s->ustats.outpkts; do { - start = u64_stats_fetch_begin_bh(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); inbytes = s->ustats.inbytes; outbytes = s->ustats.outbytes; - } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); sum->inbytes += inbytes; sum->outbytes += outbytes; } else { @@ -80,10 +80,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, sum->inpkts = s->ustats.inpkts; sum->outpkts = s->ustats.outpkts; do { - start = u64_stats_fetch_begin_bh(&s->syncp); + start = u64_stats_fetch_begin(&s->syncp); sum->inbytes = s->ustats.inbytes; sum->outbytes = s->ustats.outbytes; - } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } while (u64_stats_fetch_retry(&s->syncp, start)); } } } -- cgit v1.1 From 6060c74a3de8ed142c78133e2829e74711f77387 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Mon, 7 Mar 2011 10:11:34 +0800 Subject: netfilter:ipvs: use kmemdup The semantic patch that makes this output is available in scripts/coccinelle/api/memdup.cocci. More information about semantic patching is available at http://coccinelle.lip6.fr/ Signed-off-by: Shan Wei Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_pe_sip.c | 9 ++++----- net/netfilter/ipvs/ip_vs_sync.c | 3 +-- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0d83bc0..13d607a 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -92,14 +92,13 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) return -EINVAL; - p->pe_data = kmalloc(matchlen, GFP_ATOMIC); - if (!p->pe_data) - return -ENOMEM; - /* N.B: pe_data is only set on success, * this allows fallback to the default persistence logic on failure */ - memcpy(p->pe_data, dptr + matchoff, matchlen); + p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC); + if (!p->pe_data) + return -ENOMEM; + p->pe_data_len = matchlen; return 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index fecf24d..c5d13b0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -697,13 +697,12 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, return 1; } - p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); + p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); if (!p->pe_data) { if (p->pe->module) module_put(p->pe->module); return -ENOMEM; } - memcpy(p->pe_data, pe_data, pe_data_len); p->pe_data_len = pe_data_len; } return 0; -- cgit v1.1 From 2a0751af09c3099cf2837c623ca5d0436317d02d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 4 Mar 2011 12:20:35 +0200 Subject: ipvs: reorganize tot_stats The global tot_stats contains cpustats field just like the stats for dest and svc, so better use it to simplify the usage in estimation_timer. As tot_stats is registered as estimator we can remove the special ip_vs_read_cpu_stats call for tot_stats. Fix ip_vs_read_cpu_stats to be called under stats lock because it is still used as synchronization between estimation timer and user context (the stats readers). Also, make sure ip_vs_stats_percpu_show reads properly the u64 stats from user context. Signed-off-by: Julian Anastasov Eric Dumazet Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 6 +++--- net/netfilter/ipvs/ip_vs_ctl.c | 45 ++++++++++++++++++++++------------------- net/netfilter/ipvs/ip_vs_est.c | 3 +-- 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2d1f932..6f4940e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -132,7 +132,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.inbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.inpkts++; u64_stats_update_begin(&s->syncp); s->ustats.inbytes += skb->len; @@ -162,7 +162,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->ustats.outbytes += skb->len; u64_stats_update_end(&s->syncp); - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.outpkts++; u64_stats_update_begin(&s->syncp); s->ustats.outbytes += skb->len; @@ -183,7 +183,7 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) s = this_cpu_ptr(svc->stats.cpustats); s->ustats.conns++; - s = this_cpu_ptr(ipvs->cpustats); + s = this_cpu_ptr(ipvs->tot_stats.cpustats); s->ustats.conns++; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index f0369d6..a2a67ad 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1481,7 +1481,7 @@ static int ip_vs_zero_all(struct net *net) } } - ip_vs_zero_stats(net_ipvs(net)->tot_stats); + ip_vs_zero_stats(&net_ipvs(net)->tot_stats); return 0; } @@ -1963,7 +1963,7 @@ static const struct file_operations ip_vs_info_fops = { static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -2007,7 +2007,8 @@ static const struct file_operations ip_vs_stats_fops = { static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; + struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats; int i; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ @@ -2017,11 +2018,20 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) "CPU Conns Packets Packets Bytes Bytes\n"); for_each_possible_cpu(i) { - struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); + struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); + unsigned int start; + __u64 inbytes, outbytes; + + do { + start = u64_stats_fetch_begin_bh(&u->syncp); + inbytes = u->ustats.inbytes; + outbytes = u->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&u->syncp, start)); + seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", - i, u->ustats.conns, u->ustats.inpkts, - u->ustats.outpkts, (__u64)u->ustats.inbytes, - (__u64)u->ustats.outbytes); + i, u->ustats.conns, u->ustats.inpkts, + u->ustats.outpkts, (__u64)inbytes, + (__u64)outbytes); } spin_lock_bh(&tot_stats->lock); @@ -3505,17 +3515,12 @@ int __net_init __ip_vs_control_init(struct net *net) atomic_set(&ipvs->nullsvc_counter, 0); /* procfs stats */ - ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); - if (ipvs->tot_stats == NULL) { - pr_err("%s(): no memory.\n", __func__); - return -ENOMEM; - } - ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!ipvs->cpustats) { + ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!ipvs->tot_stats.cpustats) { pr_err("%s() alloc_percpu failed\n", __func__); goto err_alloc; } - spin_lock_init(&ipvs->tot_stats->lock); + spin_lock_init(&ipvs->tot_stats.lock); proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); @@ -3563,7 +3568,7 @@ int __net_init __ip_vs_control_init(struct net *net) goto err_dup; } #endif - ip_vs_new_estimator(net, ipvs->tot_stats); + ip_vs_new_estimator(net, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); @@ -3571,9 +3576,8 @@ int __net_init __ip_vs_control_init(struct net *net) return 0; err_dup: - free_percpu(ipvs->cpustats); + free_percpu(ipvs->tot_stats.cpustats); err_alloc: - kfree(ipvs->tot_stats); return -ENOMEM; } @@ -3582,7 +3586,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_trash_cleanup(net); - ip_vs_kill_estimator(net, ipvs->tot_stats); + ip_vs_kill_estimator(net, &ipvs->tot_stats); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); #ifdef CONFIG_SYSCTL @@ -3591,8 +3595,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); - free_percpu(ipvs->cpustats); - kfree(ipvs->tot_stats); + free_percpu(ipvs->tot_stats.cpustats); } static struct pernet_operations ipvs_control_ops = { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 88bd716..b3751cf 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -101,13 +101,12 @@ static void estimation_timer(unsigned long arg) struct netns_ipvs *ipvs; ipvs = net_ipvs(net); - ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); - ip_vs_read_cpu_stats(&s->ustats, s->cpustats); spin_lock(&s->lock); + ip_vs_read_cpu_stats(&s->ustats, s->cpustats); n_conns = s->ustats.conns; n_inpkts = s->ustats.inpkts; n_outpkts = s->ustats.outpkts; -- cgit v1.1 From 55a3d4e15c7c953ecc55b96b83d2679abf8a7899 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 14 Mar 2011 01:37:49 +0200 Subject: ipvs: properly zero stats and rates Currently, the new percpu counters are not zeroed and the zero commands do not work as expected, we still show the old sum of percpu values. OTOH, we can not reset the percpu counters from user context without causing the incrementing to use old and bogus values. So, as Eric Dumazet suggested fix that by moving all overhead to stats reading in user context. Do not introduce overhead in timer context (estimator) and incrementing (packet handling in softirqs). The new ustats0 field holds the zero point for all counter values, the rates always use 0 as base value as before. When showing the values to user space just give the difference between counters and the base values. The only drawback is that percpu stats are not zeroed, they are accessible only from /proc and are new interface, so it should not be a compatibility problem as long as the sum stats are correct after zeroing. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 96 ++++++++++++++++++++++++++---------------- net/netfilter/ipvs/ip_vs_est.c | 15 ++++--- 2 files changed, 68 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a2a67ad..804fee7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -711,13 +711,51 @@ static void ip_vs_trash_cleanup(struct net *net) } } +static void +ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) +{ +#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c +#define IP_VS_SHOW_STATS_RATE(r) dst->r = src->ustats.r + + spin_lock_bh(&src->lock); + + IP_VS_SHOW_STATS_COUNTER(conns); + IP_VS_SHOW_STATS_COUNTER(inpkts); + IP_VS_SHOW_STATS_COUNTER(outpkts); + IP_VS_SHOW_STATS_COUNTER(inbytes); + IP_VS_SHOW_STATS_COUNTER(outbytes); + + IP_VS_SHOW_STATS_RATE(cps); + IP_VS_SHOW_STATS_RATE(inpps); + IP_VS_SHOW_STATS_RATE(outpps); + IP_VS_SHOW_STATS_RATE(inbps); + IP_VS_SHOW_STATS_RATE(outbps); + + spin_unlock_bh(&src->lock); +} static void ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - memset(&stats->ustats, 0, sizeof(stats->ustats)); + /* get current counters as zero point, rates are zeroed */ + +#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c +#define IP_VS_ZERO_STATS_RATE(r) stats->ustats.r = 0 + + IP_VS_ZERO_STATS_COUNTER(conns); + IP_VS_ZERO_STATS_COUNTER(inpkts); + IP_VS_ZERO_STATS_COUNTER(outpkts); + IP_VS_ZERO_STATS_COUNTER(inbytes); + IP_VS_ZERO_STATS_COUNTER(outbytes); + + IP_VS_ZERO_STATS_RATE(cps); + IP_VS_ZERO_STATS_RATE(inpps); + IP_VS_ZERO_STATS_RATE(outpps); + IP_VS_ZERO_STATS_RATE(inbps); + IP_VS_ZERO_STATS_RATE(outbps); + ip_vs_zero_estimator(stats); spin_unlock_bh(&stats->lock); @@ -1963,7 +2001,7 @@ static const struct file_operations ip_vs_info_fops = { static int ip_vs_stats_show(struct seq_file *seq, void *v) { struct net *net = seq_file_single_net(seq); - struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; + struct ip_vs_stats_user show; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -1971,22 +2009,18 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) seq_printf(seq, " Conns Packets Packets Bytes Bytes\n"); - spin_lock_bh(&tot_stats->lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, - tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, - (unsigned long long) tot_stats->ustats.inbytes, - (unsigned long long) tot_stats->ustats.outbytes); + ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns, + show.inpkts, show.outpkts, + (unsigned long long) show.inbytes, + (unsigned long long) show.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); - seq_printf(seq,"%8X %8X %8X %16X %16X\n", - tot_stats->ustats.cps, - tot_stats->ustats.inpps, - tot_stats->ustats.outpps, - tot_stats->ustats.inbps, - tot_stats->ustats.outbps); - spin_unlock_bh(&tot_stats->lock); + seq_printf(seq, "%8X %8X %8X %16X %16X\n", + show.cps, show.inpps, show.outpps, + show.inbps, show.outbps); return 0; } @@ -2298,14 +2332,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) static void -ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) -{ - spin_lock_bh(&src->lock); - memcpy(dst, &src->ustats, sizeof(*dst)); - spin_unlock_bh(&src->lock); -} - -static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { dst->protocol = src->protocol; @@ -2691,31 +2717,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, struct ip_vs_stats *stats) { + struct ip_vs_stats_user ustats; struct nlattr *nl_stats = nla_nest_start(skb, container_type); if (!nl_stats) return -EMSGSIZE; - spin_lock_bh(&stats->lock); - - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); - NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); - NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); + ip_vs_copy_stats(&ustats, stats); - spin_unlock_bh(&stats->lock); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps); nla_nest_end(skb, nl_stats); return 0; nla_put_failure: - spin_unlock_bh(&stats->lock); nla_nest_cancel(skb, nl_stats); return -EMSGSIZE; } diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index b3751cf..a850087 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -184,13 +184,14 @@ void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) void ip_vs_zero_estimator(struct ip_vs_stats *stats) { struct ip_vs_estimator *est = &stats->est; - - /* set counters zero, caller must hold the stats->lock lock */ - est->last_inbytes = 0; - est->last_outbytes = 0; - est->last_conns = 0; - est->last_inpkts = 0; - est->last_outpkts = 0; + struct ip_vs_stats_user *u = &stats->ustats; + + /* reset counters, caller must hold the stats->lock lock */ + est->last_inbytes = u->inbytes; + est->last_outbytes = u->outbytes; + est->last_conns = u->conns; + est->last_inpkts = u->inpkts; + est->last_outpkts = u->outpkts; est->cps = 0; est->inpps = 0; est->outpps = 0; -- cgit v1.1 From ea9f22cce9c2530d659f9122819940b69506b2d9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 14 Mar 2011 01:41:54 +0200 Subject: ipvs: optimize rates reading Move the estimator reading from estimation_timer to user context. ip_vs_read_estimator() will be used to decode the rate values. As the decoded rates are not set by estimation timer there is no need to reset them in ip_vs_zero_stats. There is no need ip_vs_new_estimator() to encode stats to rates, if the destination is in trash both the stats and the rates are inactive. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 31 ++++++++++++------------------- net/netfilter/ipvs/ip_vs_est.c | 33 +++++++++++++-------------------- 2 files changed, 25 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 804fee7..c93d806 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -715,7 +715,6 @@ static void ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) { #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c -#define IP_VS_SHOW_STATS_RATE(r) dst->r = src->ustats.r spin_lock_bh(&src->lock); @@ -725,11 +724,7 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) IP_VS_SHOW_STATS_COUNTER(inbytes); IP_VS_SHOW_STATS_COUNTER(outbytes); - IP_VS_SHOW_STATS_RATE(cps); - IP_VS_SHOW_STATS_RATE(inpps); - IP_VS_SHOW_STATS_RATE(outpps); - IP_VS_SHOW_STATS_RATE(inbps); - IP_VS_SHOW_STATS_RATE(outbps); + ip_vs_read_estimator(dst, src); spin_unlock_bh(&src->lock); } @@ -742,7 +737,6 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) /* get current counters as zero point, rates are zeroed */ #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c -#define IP_VS_ZERO_STATS_RATE(r) stats->ustats.r = 0 IP_VS_ZERO_STATS_COUNTER(conns); IP_VS_ZERO_STATS_COUNTER(inpkts); @@ -750,12 +744,6 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) IP_VS_ZERO_STATS_COUNTER(inbytes); IP_VS_ZERO_STATS_COUNTER(outbytes); - IP_VS_ZERO_STATS_RATE(cps); - IP_VS_ZERO_STATS_RATE(inpps); - IP_VS_ZERO_STATS_RATE(outpps); - IP_VS_ZERO_STATS_RATE(inbps); - IP_VS_ZERO_STATS_RATE(outbps); - ip_vs_zero_estimator(stats); spin_unlock_bh(&stats->lock); @@ -2043,6 +2031,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) struct net *net = seq_file_single_net(seq); struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats; + struct ip_vs_stats_user rates; int i; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ @@ -2069,22 +2058,26 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) } spin_lock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, (unsigned long long) tot_stats->ustats.inbytes, (unsigned long long) tot_stats->ustats.outbytes); + ip_vs_read_estimator(&rates, tot_stats); + + spin_unlock_bh(&tot_stats->lock); + /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq, " %8X %8X %8X %16X %16X\n", - tot_stats->ustats.cps, - tot_stats->ustats.inpps, - tot_stats->ustats.outpps, - tot_stats->ustats.inbps, - tot_stats->ustats.outbps); - spin_unlock_bh(&tot_stats->lock); + rates.cps, + rates.inpps, + rates.outpps, + rates.inbps, + rates.outbps); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index a850087..fda75be 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -117,27 +117,22 @@ static void estimation_timer(unsigned long arg) rate = (n_conns - e->last_conns) << 9; e->last_conns = n_conns; e->cps += ((long)rate - (long)e->cps) >> 2; - s->ustats.cps = (e->cps + 0x1FF) >> 10; rate = (n_inpkts - e->last_inpkts) << 9; e->last_inpkts = n_inpkts; e->inpps += ((long)rate - (long)e->inpps) >> 2; - s->ustats.inpps = (e->inpps + 0x1FF) >> 10; rate = (n_outpkts - e->last_outpkts) << 9; e->last_outpkts = n_outpkts; e->outpps += ((long)rate - (long)e->outpps) >> 2; - s->ustats.outpps = (e->outpps + 0x1FF) >> 10; rate = (n_inbytes - e->last_inbytes) << 4; e->last_inbytes = n_inbytes; e->inbps += ((long)rate - (long)e->inbps) >> 2; - s->ustats.inbps = (e->inbps + 0xF) >> 5; rate = (n_outbytes - e->last_outbytes) << 4; e->last_outbytes = n_outbytes; e->outbps += ((long)rate - (long)e->outbps) >> 2; - s->ustats.outbps = (e->outbps + 0xF) >> 5; spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); @@ -151,21 +146,6 @@ void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) INIT_LIST_HEAD(&est->list); - est->last_conns = stats->ustats.conns; - est->cps = stats->ustats.cps<<10; - - est->last_inpkts = stats->ustats.inpkts; - est->inpps = stats->ustats.inpps<<10; - - est->last_outpkts = stats->ustats.outpkts; - est->outpps = stats->ustats.outpps<<10; - - est->last_inbytes = stats->ustats.inbytes; - est->inbps = stats->ustats.inbps<<5; - - est->last_outbytes = stats->ustats.outbytes; - est->outbps = stats->ustats.outbps<<5; - spin_lock_bh(&ipvs->est_lock); list_add(&est->list, &ipvs->est_list); spin_unlock_bh(&ipvs->est_lock); @@ -199,6 +179,19 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) est->outbps = 0; } +/* Get decoded rates */ +void ip_vs_read_estimator(struct ip_vs_stats_user *dst, + struct ip_vs_stats *stats) +{ + struct ip_vs_estimator *e = &stats->est; + + dst->cps = (e->cps + 0x1FF) >> 10; + dst->inpps = (e->inpps + 0x1FF) >> 10; + dst->outpps = (e->outpps + 0x1FF) >> 10; + dst->inbps = (e->inbps + 0xF) >> 5; + dst->outbps = (e->outbps + 0xF) >> 5; +} + static int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); -- cgit v1.1 From 6ef757f965c9133e82116475eab7f30df391c7fa Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 14 Mar 2011 01:44:28 +0200 Subject: ipvs: rename estimator functions Rename ip_vs_new_estimator to ip_vs_start_estimator and ip_vs_kill_estimator to ip_vs_stop_estimator to better match their logic. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 12 ++++++------ net/netfilter/ipvs/ip_vs_est.c | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c93d806..c5b1234 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -802,7 +802,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock(&dest->dst_lock); if (add) - ip_vs_new_estimator(svc->net, &dest->stats); + ip_vs_start_estimator(svc->net, &dest->stats); write_lock_bh(&__ip_vs_svc_lock); @@ -1008,7 +1008,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_kill_estimator(net, &dest->stats); + ip_vs_stop_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. @@ -1201,7 +1201,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); - ip_vs_new_estimator(net, &svc->stats); + ip_vs_start_estimator(net, &svc->stats); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1353,7 +1353,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) if (svc->af == AF_INET) ipvs->num_services--; - ip_vs_kill_estimator(svc->net, &svc->stats); + ip_vs_stop_estimator(svc->net, &svc->stats); /* Unbind scheduler */ old_sched = svc->scheduler; @@ -3585,7 +3585,7 @@ int __net_init __ip_vs_control_init(struct net *net) goto err_dup; } #endif - ip_vs_new_estimator(net, &ipvs->tot_stats); + ip_vs_start_estimator(net, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); @@ -3603,7 +3603,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); ip_vs_trash_cleanup(net); - ip_vs_kill_estimator(net, &ipvs->tot_stats); + ip_vs_stop_estimator(net, &ipvs->tot_stats); cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index fda75be..8c8766c 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -139,7 +139,7 @@ static void estimation_timer(unsigned long arg) mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; @@ -151,7 +151,7 @@ void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) spin_unlock_bh(&ipvs->est_lock); } -void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; -- cgit v1.1 From ba4fd7e966fa837557a3ec846c5fd15328b212af Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add ip_vs_route_me_harder() Add ip_vs_route_me_harder() to avoid repeating the same code twice. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 48 +++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 6f4940e..299c7f3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -631,6 +631,24 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) } #endif +static int ip_vs_route_me_harder(int af, struct sk_buff *skb) +{ + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) + return 1; + } else +#endif + if ((ipvs->sysctl_snat_reroute || + skb_rtable(skb)->rt_flags & RTCF_LOCAL) && + ip_route_me_harder(skb, RTN_LOCAL) != 0) + return 1; + + return 0; +} + /* * Packet has been made sufficiently writable in caller * - inout: 1=in->out, 0=out->in @@ -737,7 +755,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl) { - struct netns_ipvs *ipvs; unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { @@ -759,8 +776,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (!skb_make_writable(skb, offset)) goto out; - ipvs = net_ipvs(skb_net(skb)); - #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); @@ -768,16 +783,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #endif ip_vs_nat_icmp(skb, pp, cp, 1); -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) - goto out; - } else -#endif - if ((ipvs->sysctl_snat_reroute || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto out; + if (ip_vs_route_me_harder(af, skb)) + goto out; /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); @@ -985,7 +992,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { struct ip_vs_protocol *pp = pd->pp; - struct netns_ipvs *ipvs; IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); @@ -1021,18 +1027,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ - ipvs = net_ipvs(skb_net(skb)); - -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) - goto drop; - } else -#endif - if ((ipvs->sysctl_snat_reroute || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto drop; + if (ip_vs_route_me_harder(af, skb)) + goto drop; IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); -- cgit v1.1 From 84b3cee39ff1ffc97f4f6fba8ad26786c1f6d8f5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add sysctl_snat_reroute() In preparation for not including sysctl_snat_reroute in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 299c7f3..1d8a2a2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -599,6 +599,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } +#ifdef CONFIG_SYSCTL + +static int sysctl_snat_reroute(struct sk_buff *skb) +{ + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + return ipvs->sysctl_snat_reroute; +} + +#else + +static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } + +#endif + __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -633,15 +647,13 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) static int ip_vs_route_me_harder(int af, struct sk_buff *skb) { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); - #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) + if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0) return 1; } else #endif - if ((ipvs->sysctl_snat_reroute || + if ((sysctl_snat_reroute(skb) || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) return 1; -- cgit v1.1 From 0cfa558e2c21644a0dd6c21cfadd8bbeaf9fe1a0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add sysctl_nat_icmp_send() In preparation for not including sysctl_nat_icmp_send in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1d8a2a2..c9b8372 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -607,9 +607,16 @@ static int sysctl_snat_reroute(struct sk_buff *skb) return ipvs->sysctl_snat_reroute; } +static int sysctl_nat_icmp_send(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + return ipvs->sysctl_nat_icmp_send; +} + #else static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } +static int sysctl_nat_icmp_send(struct net *net) { return 0; } #endif @@ -1074,7 +1081,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; - struct netns_ipvs *ipvs; EnterFunction(11); @@ -1149,11 +1155,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); - ipvs = net_ipvs(net); if (likely(cp)) return handle_response(af, skb, pd, cp, iph.len); - if (ipvs->sysctl_nat_icmp_send && + if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { -- cgit v1.1 From 59e0350eada0516a810cb780db37746165f1d516 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add {sysctl_sync_threshold,period}() In preparation for not including sysctl_sync_threshold in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 10 +++++----- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index c9b8372..6a0053d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1613,15 +1613,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) - pkts = ipvs->sysctl_sync_threshold[0]; + pkts = sysctl_sync_threshold(ipvs); else pkts = atomic_add_return(1, &cp->in_pkts); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && - (pkts % ipvs->sysctl_sync_threshold[1] - == ipvs->sysctl_sync_threshold[0])) || + (pkts % sysctl_sync_period(ipvs) + == sysctl_sync_threshold(ipvs))) || (cp->old_state != cp->state && ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || @@ -1635,8 +1635,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % ipvs->sysctl_sync_threshold[1] - == ipvs->sysctl_sync_threshold[0])) || + (pkts % sysctl_sync_period(ipvs) + == sysctl_sync_threshold(ipvs))) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c5b1234..364520f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3569,8 +3569,8 @@ int __net_init __ip_vs_control_init(struct net *net) tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; - ipvs->sysctl_sync_threshold[0] = 3; - ipvs->sysctl_sync_threshold[1] = 50; + ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; + ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; tbl[idx].data = &ipvs->sysctl_sync_threshold; tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c5d13b0..e84987f 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -650,7 +650,7 @@ control: if (cp->flags & IP_VS_CONN_F_TEMPLATE) { int pkts = atomic_add_return(1, &cp->in_pkts); - if (pkts % ipvs->sysctl_sync_threshold[1] != 1) + if (pkts % sysctl_sync_period(ipvs) != 1) return; } goto sloop; @@ -794,7 +794,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); + atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs)); cp->state = state; cp->old_state = cp->state; /* -- cgit v1.1 From 7532e8d40ccfdde6667169eeac4fd7778d6eb462 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add sysctl_sync_ver() In preparation for not including sysctl_sync_ver in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e84987f..3e7961e 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode) if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) return; - if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) + if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff) return; spin_lock_bh(&ipvs->sync_buff_lock); @@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ - if (ipvs->sysctl_sync_ver == 0) { + if (sysctl_sync_ver(ipvs) == 0) { ip_vs_sync_conn_v0(net, cp); return; } -- cgit v1.1 From 71a8ab6cad63b4816711f2ea518755677a870f6f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add sysctl_expire_nodest_conn() In preparation for not including sysctl_expire_nodest_conn in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 6a0053d..d418bc6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -613,10 +613,16 @@ static int sysctl_nat_icmp_send(struct net *net) return ipvs->sysctl_nat_icmp_send; } +static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_expire_nodest_conn; +} + #else static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } static int sysctl_nat_icmp_send(struct net *net) { return 0; } +static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; } #endif @@ -1583,7 +1589,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (ipvs->sysctl_expire_nodest_conn) { + if (sysctl_expire_nodest_conn(ipvs)) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } -- cgit v1.1 From 8e1b0b1b560019cafebe45a7d9e6ec1122fedc7b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Add expire_quiescent_template() In preparation for not including sysctl_expire_quiescent_template in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_conn.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9c2a517..f289306 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -680,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) atomic_dec(&dest->refcnt); } +static int expire_quiescent_template(struct netns_ipvs *ipvs, + struct ip_vs_dest *dest) +{ +#ifdef CONFIG_SYSCTL + return ipvs->sysctl_expire_quiescent_template && + (atomic_read(&dest->weight) == 0); +#else + return 0; +#endif +} /* * Checking if the destination of a connection template is available. @@ -696,8 +706,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - (ipvs->sysctl_expire_quiescent_template && - (atomic_read(&dest->weight) == 0))) { + expire_quiescent_template(ipvs, dest)) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " "-> d:%s:%d\n", -- cgit v1.1 From b27d777ec54205eb56cf4e873d043a426881c629 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:01 +0900 Subject: IPVS: Conditinally use sysctl_lblc{r}_expiration In preparation for not including sysctl_lblc{r}_expiration in struct netns_ipvs when CONFIG_SYCTL is not defined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_lblc.c | 16 +++++++++++++--- net/netfilter/ipvs/ip_vs_lblcr.c | 21 +++++++++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 6bf7a80..51a27f5 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -63,6 +63,8 @@ #define CHECK_EXPIRE_INTERVAL (60*HZ) #define ENTRY_TIMEOUT (6*60*HZ) +#define DEFAULT_EXPIRATION (24*60*60*HZ) + /* * It is for full expiration check. * When there is no partial expiration check (garbage collection) @@ -238,6 +240,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) } } +static int sysctl_lblc_expiration(struct ip_vs_service *svc) +{ +#ifdef CONFIG_SYSCTL + struct netns_ipvs *ipvs = net_ipvs(svc->net); + return ipvs->sysctl_lblc_expiration; +#else + return DEFAULT_EXPIRATION; +#endif +} static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) { @@ -245,7 +256,6 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; - struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; ibucket[j], list) { if (time_before(now, en->lastuse + - ipvs->sysctl_lblc_expiration)) + sysctl_lblc_expiration(svc))) continue; ip_vs_lblc_free(en); @@ -550,7 +560,7 @@ static int __net_init __ip_vs_lblc_init(struct net *net) return -ENOMEM; } else ipvs->lblc_ctl_table = vs_vars_table; - ipvs->sysctl_lblc_expiration = 24*60*60*HZ; + ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION; ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0063176..7fb9190 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -63,6 +63,8 @@ #define CHECK_EXPIRE_INTERVAL (60*HZ) #define ENTRY_TIMEOUT (6*60*HZ) +#define DEFAULT_EXPIRATION (24*60*60*HZ) + /* * It is for full expiration check. * When there is no partial expiration check (garbage collection) @@ -410,6 +412,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) } } +static int sysctl_lblcr_expiration(struct ip_vs_service *svc) +{ +#ifdef CONFIG_SYSCTL + struct netns_ipvs *ipvs = net_ipvs(svc->net); + return ipvs->sysctl_lblcr_expiration; +#else + return DEFAULT_EXPIRATION; +#endif +} static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) { @@ -417,15 +428,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; - struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; isched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { - if (time_after(en->lastuse - + ipvs->sysctl_lblcr_expiration, now)) + if (time_after(en->lastuse + + sysctl_lblcr_expiration(svc), now)) continue; ip_vs_lblcr_free(en); @@ -650,7 +660,6 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { - struct netns_ipvs *ipvs = net_ipvs(svc->net); /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -662,7 +671,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && time_after(jiffies, en->set.lastmod + - ipvs->sysctl_lblcr_expiration)) { + sysctl_lblcr_expiration(svc))) { struct ip_vs_dest *m; write_lock(&en->set.lock); @@ -746,7 +755,7 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) return -ENOMEM; } else ipvs->lblcr_ctl_table = vs_vars_table; - ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; + ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; #ifdef CONFIG_SYSCTL -- cgit v1.1 From a7a86b8616bc1595c4f5f109b7c39d4eb5d55e32 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: Minimise ip_vs_leave when CONFIG_SYSCTL is undefined Much of ip_vs_leave() is unnecessary if CONFIG_SYSCTL is undefined. I tried an approach of breaking the now #ifdef'ed portions out into a separate function. However this appeared to grow the compiled code on x86_64 by about 200 bytes in the case where CONFIG_SYSCTL is defined. So I have gone with the simpler though less elegant #ifdef'ed solution for now. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d418bc6..07accf6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -499,11 +499,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd) { - struct net *net; - struct netns_ipvs *ipvs; __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; +#ifdef CONFIG_SYSCTL + struct net *net; + struct netns_ipvs *ipvs; int unicast; +#endif ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); @@ -512,6 +514,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_service_put(svc); return NF_DROP; } + +#ifdef CONFIG_SYSCTL net = skb_net(skb); #ifdef CONFIG_IP_VS_IPV6 @@ -563,6 +567,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_conn_put(cp); return ret; } +#endif /* * When the virtual ftp service is presented, packets destined -- cgit v1.1 From fb1de432c1c7c26afb2e86d166fc37888b6a4423 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: Conditionally define and use ip_vs_lblc{r}_table ip_vs_lblc_table and ip_vs_lblcr_table, and code that uses them are unnecessary when CONFIG_SYSCTL is undefined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_lblc.c | 15 ++++++++++----- net/netfilter/ipvs/ip_vs_lblcr.c | 14 ++++++++++---- 2 files changed, 20 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 51a27f5..f276df9 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -114,7 +114,7 @@ struct ip_vs_lblc_table { /* * IPVS LBLC sysctl table */ - +#ifdef CONFIG_SYSCTL static ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", @@ -125,6 +125,7 @@ static ctl_table vs_vars_table[] = { }, { } }; +#endif static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { @@ -548,6 +549,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = /* * per netns init. */ +#ifdef CONFIG_SYSCTL static int __net_init __ip_vs_lblc_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -563,7 +565,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net) ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION; ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; -#ifdef CONFIG_SYSCTL ipvs->lblc_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblc_ctl_table); @@ -572,7 +573,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net) kfree(ipvs->lblc_ctl_table); return -ENOMEM; } -#endif return 0; } @@ -581,14 +581,19 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); -#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblc_ctl_header); -#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblc_ctl_table); } +#else + +static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; } +static void __net_exit __ip_vs_lblc_exit(struct net *net) { } + +#endif + static struct pernet_operations ip_vs_lblc_ops = { .init = __ip_vs_lblc_init, .exit = __ip_vs_lblc_exit, diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 7fb9190..cb1c991 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -285,6 +285,7 @@ struct ip_vs_lblcr_table { }; +#ifdef CONFIG_SYSCTL /* * IPVS LBLCR sysctl table */ @@ -299,6 +300,7 @@ static ctl_table vs_vars_table[] = { }, { } }; +#endif static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { @@ -743,6 +745,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = /* * per netns init. */ +#ifdef CONFIG_SYSCTL static int __net_init __ip_vs_lblcr_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -758,7 +761,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION; ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; -#ifdef CONFIG_SYSCTL ipvs->lblcr_ctl_header = register_net_sysctl_table(net, net_vs_ctl_path, ipvs->lblcr_ctl_table); @@ -767,7 +769,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net) kfree(ipvs->lblcr_ctl_table); return -ENOMEM; } -#endif return 0; } @@ -776,14 +777,19 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); -#ifdef CONFIG_SYSCTL unregister_net_sysctl_table(ipvs->lblcr_ctl_header); -#endif if (!net_eq(net, &init_net)) kfree(ipvs->lblcr_ctl_table); } +#else + +static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; } +static void __net_exit __ip_vs_lblcr_exit(struct net *net) { } + +#endif + static struct pernet_operations ip_vs_lblcr_ops = { .init = __ip_vs_lblcr_init, .exit = __ip_vs_lblcr_exit, -- cgit v1.1 From 14e405461e664b777e2a5636e10b2ebf36a686ec Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 4 Feb 2011 18:33:02 +0900 Subject: IPVS: Add __ip_vs_control_{init,cleanup}_sysctl() Break out the portions of __ip_vs_control_init() and __ip_vs_control_cleanup() where aren't necessary when CONFIG_SYSCTL is undefined. Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 98 ++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 364520f..fa6d44c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -88,6 +88,8 @@ static int __ip_vs_addr_is_local_v6(struct net *net, return 0; } #endif + +#ifdef CONFIG_SYSCTL /* * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs @@ -229,6 +231,7 @@ static void defense_work_handler(struct work_struct *work) ip_vs_random_dropentry(ipvs->net); schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); } +#endif int ip_vs_use_count_inc(void) @@ -1511,7 +1514,7 @@ static int ip_vs_zero_all(struct net *net) return 0; } - +#ifdef CONFIG_SYSCTL static int proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1533,7 +1536,6 @@ proc_do_defense_mode(ctl_table *table, int write, return rc; } - static int proc_do_sync_threshold(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -1767,6 +1769,7 @@ const struct ctl_path net_vs_ctl_path[] = { { } }; EXPORT_SYMBOL_GPL(net_vs_ctl_path); +#endif #ifdef CONFIG_PROC_FS @@ -3511,7 +3514,8 @@ static void ip_vs_genl_unregister(void) /* * per netns intit/exit func. */ -int __net_init __ip_vs_control_init(struct net *net) +#ifdef CONFIG_SYSCTL +int __net_init __ip_vs_control_init_sysctl(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); @@ -3521,33 +3525,11 @@ int __net_init __ip_vs_control_init(struct net *net) spin_lock_init(&ipvs->dropentry_lock); spin_lock_init(&ipvs->droppacket_lock); spin_lock_init(&ipvs->securetcp_lock); - ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); - - /* Initialize rs_table */ - for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) - INIT_LIST_HEAD(&ipvs->rs_table[idx]); - - INIT_LIST_HEAD(&ipvs->dest_trash); - atomic_set(&ipvs->ftpsvc_counter, 0); - atomic_set(&ipvs->nullsvc_counter, 0); - - /* procfs stats */ - ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); - if (!ipvs->tot_stats.cpustats) { - pr_err("%s() alloc_percpu failed\n", __func__); - goto err_alloc; - } - spin_lock_init(&ipvs->tot_stats.lock); - - proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); - proc_net_fops_create(net, "ip_vs_stats_percpu", 0, - &ip_vs_stats_percpu_fops); if (!net_eq(net, &init_net)) { tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); if (tbl == NULL) - goto err_dup; + return -ENOMEM; } else tbl = vs_vars; /* Initialize sysctl defaults */ @@ -3576,25 +3558,73 @@ int __net_init __ip_vs_control_init(struct net *net) tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; -#ifdef CONFIG_SYSCTL ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, tbl); if (ipvs->sysctl_hdr == NULL) { if (!net_eq(net, &init_net)) kfree(tbl); - goto err_dup; + return -ENOMEM; } -#endif ip_vs_start_estimator(net, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); + return 0; +} + +void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + cancel_delayed_work_sync(&ipvs->defense_work); + cancel_work_sync(&ipvs->defense_work.work); + unregister_net_sysctl_table(ipvs->sysctl_hdr); +} -err_dup: +#else + +int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; } +void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } + +#endif + +int __net_init __ip_vs_control_init(struct net *net) +{ + int idx; + struct netns_ipvs *ipvs = net_ipvs(net); + + ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + + /* Initialize rs_table */ + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + + INIT_LIST_HEAD(&ipvs->dest_trash); + atomic_set(&ipvs->ftpsvc_counter, 0); + atomic_set(&ipvs->nullsvc_counter, 0); + + /* procfs stats */ + ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (ipvs->tot_stats.cpustats) { + pr_err("%s(): alloc_percpu.\n", __func__); + return -ENOMEM; + } + spin_lock_init(&ipvs->tot_stats.lock); + + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); + proc_net_fops_create(net, "ip_vs_stats_percpu", 0, + &ip_vs_stats_percpu_fops); + + if (__ip_vs_control_init_sysctl(net)) + goto err; + + return 0; + +err: free_percpu(ipvs->tot_stats.cpustats); -err_alloc: return -ENOMEM; } @@ -3604,11 +3634,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); - cancel_delayed_work_sync(&ipvs->defense_work); - cancel_work_sync(&ipvs->defense_work.work); -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(ipvs->sysctl_hdr); -#endif + __ip_vs_control_cleanup_sysctl(net); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); -- cgit v1.1 From 8183e3a88aced228ab9770762692be6cc3786e80 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 15 Mar 2011 13:23:28 +0100 Subject: netfilter: xt_connlimit: fix daddr connlimit in SNAT scenario We use the reply tuples when limiting the connections by the destination addresses, however, in SNAT scenario, the final reply tuples won't be ready until SNAT is done in POSTROUING or INPUT chain, and the following nf_conntrack_find_get() in count_tem() will get nothing, so connlimit can't work as expected. In this patch, the original tuples are always used, and an additional member addr is appended to save the address in either end. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- net/netfilter/xt_connlimit.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index e029c48..1f4b9f9 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -33,8 +33,9 @@ /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { - struct list_head list; - struct nf_conntrack_tuple tuple; + struct list_head list; + struct nf_conntrack_tuple tuple; + union nf_inet_addr addr; }; struct xt_connlimit_data { @@ -151,7 +152,7 @@ static int count_them(struct net *net, continue; } - if (same_source_net(addr, mask, &conn->tuple.src.u3, family)) + if (same_source_net(addr, mask, &conn->addr, family)) /* same source network -> be counted! */ ++matches; nf_ct_put(found_ct); @@ -165,6 +166,7 @@ static int count_them(struct net *net, if (conn == NULL) return -ENOMEM; conn->tuple = *tuple; + conn->addr = *addr; list_add(&conn->list, hash); ++matches; } @@ -185,15 +187,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) int connections; ct = nf_ct_get(skb, &ctinfo); - if (ct != NULL) { - if (info->flags & XT_CONNLIMIT_DADDR) - tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - else - tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, &tuple)) { + if (ct != NULL) + tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + par->family, &tuple)) goto hotdrop; - } if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); -- cgit v1.1 From 0e23ca14f8e76091b402c01e2b169aba3d187b98 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 15 Mar 2011 13:24:56 +0100 Subject: netfilter: xt_connlimit: use kmalloc() instead of kzalloc() All the members are initialized after kzalloc(). Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- net/netfilter/xt_connlimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 1f4b9f9..ade2a80 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -162,7 +162,7 @@ static int count_them(struct net *net, if (addit) { /* save the new connection in our list */ - conn = kzalloc(sizeof(*conn), GFP_ATOMIC); + conn = kmalloc(sizeof(*conn), GFP_ATOMIC); if (conn == NULL) return -ENOMEM; conn->tuple = *tuple; -- cgit v1.1 From 3e0d5149e6dcbe7111a63773a07c5b33f7ca7236 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 15 Mar 2011 13:25:42 +0100 Subject: netfilter: xt_connlimit: use hlist instead The header of hlist is smaller than list. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- net/netfilter/xt_connlimit.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index ade2a80..da56d6e 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -33,14 +33,14 @@ /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { - struct list_head list; + struct hlist_node node; struct nf_conntrack_tuple tuple; union nf_inet_addr addr; }; struct xt_connlimit_data { - struct list_head iphash[256]; - spinlock_t lock; + struct hlist_head iphash[256]; + spinlock_t lock; }; static u_int32_t connlimit_rnd __read_mostly; @@ -102,9 +102,9 @@ static int count_them(struct net *net, { const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; - struct xt_connlimit_conn *tmp; + struct hlist_node *pos, *n; struct nf_conn *found_ct; - struct list_head *hash; + struct hlist_head *hash; bool addit = true; int matches = 0; @@ -116,7 +116,7 @@ static int count_them(struct net *net, rcu_read_lock(); /* check the saved connections */ - list_for_each_entry_safe(conn, tmp, hash, list) { + hlist_for_each_entry_safe(conn, pos, n, hash, node) { found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, &conn->tuple); found_ct = NULL; @@ -136,7 +136,7 @@ static int count_them(struct net *net, if (found == NULL) { /* this one is gone */ - list_del(&conn->list); + hlist_del(&conn->node); kfree(conn); continue; } @@ -147,7 +147,7 @@ static int count_them(struct net *net, * closed already -> ditch it */ nf_ct_put(found_ct); - list_del(&conn->list); + hlist_del(&conn->node); kfree(conn); continue; } @@ -167,7 +167,7 @@ static int count_them(struct net *net, return -ENOMEM; conn->tuple = *tuple; conn->addr = *addr; - list_add(&conn->list, hash); + hlist_add_head(&conn->node, hash); ++matches; } @@ -246,7 +246,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) spin_lock_init(&info->data->lock); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) - INIT_LIST_HEAD(&info->data->iphash[i]); + INIT_HLIST_HEAD(&info->data->iphash[i]); return 0; } @@ -255,15 +255,15 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; - struct xt_connlimit_conn *tmp; - struct list_head *hash = info->data->iphash; + struct hlist_node *pos, *n; + struct hlist_head *hash = info->data->iphash; unsigned int i; nf_ct_l3proto_module_put(par->family); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { - list_for_each_entry_safe(conn, tmp, &hash[i], list) { - list_del(&conn->list); + hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) { + hlist_del(&conn->node); kfree(conn); } } -- cgit v1.1 From 4656c4d61adb8dc3ee04c08f57a5cc7598814420 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 15 Mar 2011 13:26:32 +0100 Subject: netfilter: xt_connlimit: remove connlimit_rnd_inited A potential race condition when generating connlimit_rnd is also fixed. Signed-off-by: Changli Gao Signed-off-by: Patrick McHardy --- net/netfilter/xt_connlimit.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index da56d6e..c6d5a83 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -44,7 +44,6 @@ struct xt_connlimit_data { }; static u_int32_t connlimit_rnd __read_mostly; -static bool connlimit_rnd_inited __read_mostly; static inline unsigned int connlimit_iphash(__be32 addr) { @@ -226,9 +225,13 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) unsigned int i; int ret; - if (unlikely(!connlimit_rnd_inited)) { - get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); - connlimit_rnd_inited = true; + if (unlikely(!connlimit_rnd)) { + u_int32_t rand; + + do { + get_random_bytes(&rand, sizeof(rand)); + } while (!rand); + cmpxchg(&connlimit_rnd, 0, rand); } ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) { -- cgit v1.1 From 42eab94fff18cb1091d3501cd284d6bd6cc9c143 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 15 Mar 2011 13:35:21 +0100 Subject: netfilter: arp_tables: fix infoleak to userspace Structures ipt_replace, compat_ipt_replace, and xt_get_revision are copied from userspace. Fields of these structs that are zero-terminated strings are not checked. When they are used as argument to a format string containing "%s" in request_module(), some sensitive information is leaked to userspace via argument of spawned modprobe process. The first bug was introduced before the git epoch; the second is introduced by 6b7d31fc (v2.6.15-rc1); the third is introduced by 6b7d31fc (v2.6.15-rc1). To trigger the bug one should have CAP_NET_ADMIN. Signed-off-by: Vasiliy Kulikov Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e95054c..4b5d457 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1066,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user, /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1488,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user, return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1740,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), -- cgit v1.1 From 78b79876761b86653df89c48a7010b5cbd41a84a Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 15 Mar 2011 13:36:05 +0100 Subject: netfilter: ip_tables: fix infoleak to userspace Structures ipt_replace, compat_ipt_replace, and xt_get_revision are copied from userspace. Fields of these structs that are zero-terminated strings are not checked. When they are used as argument to a format string containing "%s" in request_module(), some sensitive information is leaked to userspace via argument of spawned modprobe process. The first and the third bugs were introduced before the git epoch; the second was introduced in 2722971c (v2.6.17-rc1). To trigger the bug one should have CAP_NET_ADMIN. Signed-off-by: Vasiliy Kulikov Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef7d7b9..b09ed0d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1262,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1807,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -2036,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IPT_SO_GET_REVISION_TARGET) target = 1; -- cgit v1.1 From 6a8ab060779779de8aea92ce3337ca348f973f54 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 15 Mar 2011 13:37:13 +0100 Subject: ipv6: netfilter: ip6_tables: fix infoleak to userspace Structures ip6t_replace, compat_ip6t_replace, and xt_get_revision are copied from userspace. Fields of these structs that are zero-terminated strings are not checked. When they are used as argument to a format string containing "%s" in request_module(), some sensitive information is leaked to userspace via argument of spawned modprobe process. The first bug was introduced before the git epoch; the second was introduced in 3bc3fe5e (v2.6.25-rc1); the third is introduced by 6b7d31fc (v2.6.15-rc1). To trigger the bug one should have CAP_NET_ADMIN. Signed-off-by: Vasiliy Kulikov Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/ip6_tables.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 47b7b8d..c9598a9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1275,6 +1275,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -1822,6 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) @@ -2051,6 +2053,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IP6T_SO_GET_REVISION_TARGET) target = 1; -- cgit v1.1 From de81bbea17650769882bc625d6b5df11ee7c4b24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Mar 2011 20:16:20 +0100 Subject: netfilter: ipt_addrtype: rename to xt_addrtype Followup patch will add ipv6 support. ipt_addrtype.h is retained for compatibility reasons, but no longer used by the kernel. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 10 --- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_addrtype.c | 134 ------------------------------------- net/netfilter/Kconfig | 10 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_addrtype.c | 135 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 146 insertions(+), 145 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_addrtype.c create mode 100644 net/netfilter/xt_addrtype.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f926a31..1dfc18a 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -64,16 +64,6 @@ config IP_NF_IPTABLES if IP_NF_IPTABLES # The matches. -config IP_NF_MATCH_ADDRTYPE - tristate '"addrtype" address type match support' - depends on NETFILTER_ADVANCED - help - This option allows you to match what routing thinks of an address, - eg. UNICAST, LOCAL, BROADCAST, ... - - If you want to compile it as a module, say M here and read - . If unsure, say `N'. - config IP_NF_MATCH_AH tristate '"ah" match support' depends on NETFILTER_ADVANCED diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 19eb59d..dca2082 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches -obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c deleted file mode 100644 index db8bff0..0000000 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * iptables module to match inet_addr_type() of an ip. - * - * Copyright (c) 2004 Patrick McHardy - * (C) 2007 Laszlo Attila Toth - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include - -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("Xtables: address type match for IPv4"); - -static inline bool match_type(struct net *net, const struct net_device *dev, - __be32 addr, u_int16_t mask) -{ - return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); -} - -static bool -addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - const struct ipt_addrtype_info *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); - bool ret = true; - - if (info->source) - ret &= match_type(net, NULL, iph->saddr, info->source) ^ - info->invert_source; - if (info->dest) - ret &= match_type(net, NULL, iph->daddr, info->dest) ^ - info->invert_dest; - - return ret; -} - -static bool -addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) -{ - struct net *net = dev_net(par->in ? par->in : par->out); - const struct ipt_addrtype_info_v1 *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); - const struct net_device *dev = NULL; - bool ret = true; - - if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) - dev = par->in; - else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = par->out; - - if (info->source) - ret &= match_type(net, dev, iph->saddr, info->source) ^ - (info->flags & IPT_ADDRTYPE_INVERT_SOURCE); - if (ret && info->dest) - ret &= match_type(net, dev, iph->daddr, info->dest) ^ - !!(info->flags & IPT_ADDRTYPE_INVERT_DEST); - return ret; -} - -static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) -{ - struct ipt_addrtype_info_v1 *info = par->matchinfo; - - if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("both incoming and outgoing " - "interface limitation cannot be selected\n"); - return -EINVAL; - } - - if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN)) && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("output interface limitation " - "not valid in PREROUTING and INPUT\n"); - return -EINVAL; - } - - if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT)) && - info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { - pr_info("input interface limitation " - "not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; - } - - return 0; -} - -static struct xt_match addrtype_mt_reg[] __read_mostly = { - { - .name = "addrtype", - .family = NFPROTO_IPV4, - .match = addrtype_mt_v0, - .matchsize = sizeof(struct ipt_addrtype_info), - .me = THIS_MODULE - }, - { - .name = "addrtype", - .family = NFPROTO_IPV4, - .revision = 1, - .match = addrtype_mt_v1, - .checkentry = addrtype_mt_checkentry_v1, - .matchsize = sizeof(struct ipt_addrtype_info_v1), - .me = THIS_MODULE - } -}; - -static int __init addrtype_mt_init(void) -{ - return xt_register_matches(addrtype_mt_reg, - ARRAY_SIZE(addrtype_mt_reg)); -} - -static void __exit addrtype_mt_exit(void) -{ - xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); -} - -module_init(addrtype_mt_init); -module_exit(addrtype_mt_exit); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 82a6e0d..32bff6d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -649,6 +649,16 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP comment "Xtables matches" +config NETFILTER_XT_MATCH_ADDRTYPE + tristate '"addrtype" address type match support' + depends on NETFILTER_ADVANCED + ---help--- + This option allows you to match what routing thinks of an address, + eg. UNICAST, LOCAL, BROADCAST, ... + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + config NETFILTER_XT_MATCH_CLUSTER tristate '"cluster" match support' depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index d57a890..1a02853 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o # matches +obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c new file mode 100644 index 0000000..e89c0b8 --- /dev/null +++ b/net/netfilter/xt_addrtype.c @@ -0,0 +1,135 @@ +/* + * iptables module to match inet_addr_type() of an ip. + * + * Copyright (c) 2004 Patrick McHardy + * (C) 2007 Laszlo Attila Toth + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Xtables: address type match"); +MODULE_ALIAS("ipt_addrtype"); + +static inline bool match_type(struct net *net, const struct net_device *dev, + __be32 addr, u_int16_t mask) +{ + return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); +} + +static bool +addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct net *net = dev_net(par->in ? par->in : par->out); + const struct xt_addrtype_info *info = par->matchinfo; + const struct iphdr *iph = ip_hdr(skb); + bool ret = true; + + if (info->source) + ret &= match_type(net, NULL, iph->saddr, info->source) ^ + info->invert_source; + if (info->dest) + ret &= match_type(net, NULL, iph->daddr, info->dest) ^ + info->invert_dest; + + return ret; +} + +static bool +addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + struct net *net = dev_net(par->in ? par->in : par->out); + const struct xt_addrtype_info_v1 *info = par->matchinfo; + const struct iphdr *iph = ip_hdr(skb); + const struct net_device *dev = NULL; + bool ret = true; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) + dev = par->in; + else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + dev = par->out; + + if (info->source) + ret &= match_type(net, dev, iph->saddr, info->source) ^ + (info->flags & XT_ADDRTYPE_INVERT_SOURCE); + if (ret && info->dest) + ret &= match_type(net, dev, iph->daddr, info->dest) ^ + !!(info->flags & XT_ADDRTYPE_INVERT_DEST); + return ret; +} + +static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) +{ + struct xt_addrtype_info_v1 *info = par->matchinfo; + + if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { + pr_info("both incoming and outgoing " + "interface limitation cannot be selected\n"); + return -EINVAL; + } + + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { + pr_info("output interface limitation " + "not valid in PREROUTING and INPUT\n"); + return -EINVAL; + } + + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && + info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) { + pr_info("input interface limitation " + "not valid in POSTROUTING and OUTPUT\n"); + return -EINVAL; + } + + return 0; +} + +static struct xt_match addrtype_mt_reg[] __read_mostly = { + { + .name = "addrtype", + .family = NFPROTO_IPV4, + .match = addrtype_mt_v0, + .matchsize = sizeof(struct xt_addrtype_info), + .me = THIS_MODULE + }, + { + .name = "addrtype", + .family = NFPROTO_IPV4, + .revision = 1, + .match = addrtype_mt_v1, + .checkentry = addrtype_mt_checkentry_v1, + .matchsize = sizeof(struct xt_addrtype_info_v1), + .me = THIS_MODULE + } +}; + +static int __init addrtype_mt_init(void) +{ + return xt_register_matches(addrtype_mt_reg, + ARRAY_SIZE(addrtype_mt_reg)); +} + +static void __exit addrtype_mt_exit(void) +{ + xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); +} + +module_init(addrtype_mt_init); +module_exit(addrtype_mt_exit); -- cgit v1.1 From 2f5dc63123905a89d4260ab8ee08d19ec104db04 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Mar 2011 20:17:44 +0100 Subject: netfilter: xt_addrtype: ipv6 support The kernel will refuse certain types that do not work in ipv6 mode. We can then add these features incrementally without risk of userspace breakage. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 1 + net/netfilter/xt_addrtype.c | 98 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 32bff6d..c3f988a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -652,6 +652,7 @@ comment "Xtables matches" config NETFILTER_XT_MATCH_ADDRTYPE tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED + depends on (IPV6 || IPV6=n) ---help--- This option allows you to match what routing thinks of an address, eg. UNICAST, LOCAL, BROADCAST, ... diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index e89c0b8..2220b85 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -16,6 +16,12 @@ #include #include +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#include +#include +#include +#endif + #include #include @@ -23,6 +29,73 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("Xtables: address type match"); MODULE_ALIAS("ipt_addrtype"); +MODULE_ALIAS("ip6t_addrtype"); + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt) +{ + u32 ret; + + if (!rt) + return XT_ADDRTYPE_UNREACHABLE; + + if (rt->rt6i_flags & RTF_REJECT) + ret = XT_ADDRTYPE_UNREACHABLE; + else + ret = 0; + + if (rt->rt6i_flags & RTF_LOCAL) + ret |= XT_ADDRTYPE_LOCAL; + if (rt->rt6i_flags & RTF_ANYCAST) + ret |= XT_ADDRTYPE_ANYCAST; + return ret; +} + +static bool match_type6(struct net *net, const struct net_device *dev, + const struct in6_addr *addr, u16 mask) +{ + int addr_type = ipv6_addr_type(addr); + + if ((mask & XT_ADDRTYPE_MULTICAST) && + !(addr_type & IPV6_ADDR_MULTICAST)) + return false; + if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST)) + return false; + if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY) + return false; + + if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | + XT_ADDRTYPE_UNREACHABLE) & mask) { + struct rt6_info *rt; + u32 type; + int ifindex = dev ? dev->ifindex : 0; + + rt = rt6_lookup(net, addr, NULL, ifindex, !!dev); + + type = xt_addrtype_rt6_to_type(rt); + + dst_release(&rt->dst); + return !!(mask & type); + } + return true; +} + +static bool +addrtype_mt6(struct net *net, const struct net_device *dev, + const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + bool ret = true; + + if (info->source) + ret &= match_type6(net, dev, &iph->saddr, info->source) ^ + (info->flags & XT_ADDRTYPE_INVERT_SOURCE); + if (ret && info->dest) + ret &= match_type6(net, dev, &iph->daddr, info->dest) ^ + !!(info->flags & XT_ADDRTYPE_INVERT_DEST); + return ret; +} +#endif static inline bool match_type(struct net *net, const struct net_device *dev, __be32 addr, u_int16_t mask) @@ -53,7 +126,7 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = dev_net(par->in ? par->in : par->out); const struct xt_addrtype_info_v1 *info = par->matchinfo; - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; const struct net_device *dev = NULL; bool ret = true; @@ -62,6 +135,11 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) dev = par->out; +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + if (par->family == NFPROTO_IPV6) + return addrtype_mt6(net, dev, skb, info); +#endif + iph = ip_hdr(skb); if (info->source) ret &= match_type(net, dev, iph->saddr, info->source) ^ (info->flags & XT_ADDRTYPE_INVERT_SOURCE); @@ -98,6 +176,22 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) return -EINVAL; } +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + if (par->family == NFPROTO_IPV6) { + if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { + pr_err("ipv6 BLACKHOLE matching not supported\n"); + return -EINVAL; + } + if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { + pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n"); + return -EINVAL; + } + if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { + pr_err("ipv6 does not support BROADCAST matching\n"); + return -EINVAL; + } + } +#endif return 0; } @@ -111,7 +205,7 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = { }, { .name = "addrtype", - .family = NFPROTO_IPV4, + .family = NFPROTO_UNSPEC, .revision = 1, .match = addrtype_mt_v1, .checkentry = addrtype_mt_checkentry_v1, -- cgit v1.1 From 638be344593b66ccca6802c6076a5b3d9200829d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Tue, 15 Mar 2011 14:55:49 -0700 Subject: Phonet: fix aligned-mode pipe socket buffer header reserve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the pipe uses aligned-mode data packets, we must reserve 4 bytes instead of 3 for the pipe protocol header. Otherwise the Phonet header would not be aligned, resulting in potentially corrupted headers with later unaligned memory writes. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 68e635f..f17fd84 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1055,7 +1055,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, if (!skb) return err; - skb_reserve(skb, MAX_PHONET_HEADER + 3); + skb_reserve(skb, MAX_PHONET_HEADER + 3 + pn->aligned); err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); if (err < 0) goto outfree; -- cgit v1.1 From 7313714775a6411402f63261c05fbb4ee3d5b64a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Mar 2011 15:26:43 -0700 Subject: xfrm: fix __xfrm_route_forward() This function should return 0 in case of error, 1 if OK commit 452edd598f60522 (xfrm: Return dst directly from xfrm_lookup()) got it wrong. Reported-and-bisected-by: Michael Smith Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1ba0258..027e3c6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2175,7 +2175,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) struct net *net = dev_net(skb->dev); struct flowi fl; struct dst_entry *dst; - int res = 0; + int res = 1; if (xfrm_decode_session(skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); @@ -2186,7 +2186,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); if (IS_ERR(dst)) { - res = 1; + res = 0; dst = NULL; } skb_dst_set(skb, dst); -- cgit v1.1 From 4a2b9c3756077c05dd8666e458a751d2248b61b6 Mon Sep 17 00:00:00 2001 From: Dan Siemon Date: Tue, 15 Mar 2011 13:56:07 +0000 Subject: net_sched: fix ip_tos2prio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ECN support incorrectly maps ECN BESTEFFORT packets to TC_PRIO_FILLER (1) instead of TC_PRIO_BESTEFFORT (0) This means ECN enabled flows are placed in pfifo_fast/prio low priority band, giving ECN enabled flows [ECT(0) and CE codepoints] higher drop probabilities. This is rather unfortunate, given we would like ECN being more widely used. Ref : http://www.coverfire.com/archives/2011/03/13/pfifo_fast-and-ecn/ Signed-off-by: Dan Siemon Signed-off-by: Eric Dumazet Cc: Dave Täht Cc: Jonathan Morton Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 209989c..870b518 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -204,7 +204,7 @@ static struct dst_ops ipv4_dst_ops = { const __u8 ip_tos2prio[16] = { TC_PRIO_BESTEFFORT, - ECN_OR_COST(FILLER), + ECN_OR_COST(BESTEFFORT), TC_PRIO_BESTEFFORT, ECN_OR_COST(BESTEFFORT), TC_PRIO_BULK, -- cgit v1.1 From 400b871ba623b5e8263a3a43de7b45fab0103a57 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 16 Mar 2011 18:32:13 +0100 Subject: netfilter ebtables: fix xt_AUDIT to work with ebtables Even though ebtables uses xtables it still requires targets to return EBT_CONTINUE instead of XT_CONTINUE. This prevented xt_AUDIT to work as ebt module. Upon Jan's suggestion, use a separate struct xt_target for NFPROTO_BRIDGE having its own target callback returning EBT_CONTINUE instead of cloning the module. Signed-off-by: Thomas Graf Signed-off-by: Patrick McHardy --- net/netfilter/xt_AUDIT.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 81802d2..363a99e 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -168,6 +169,13 @@ errout: return XT_CONTINUE; } +static unsigned int +audit_tg_ebt(struct sk_buff *skb, const struct xt_action_param *par) +{ + audit_tg(skb, par); + return EBT_CONTINUE; +} + static int audit_tg_check(const struct xt_tgchk_param *par) { const struct xt_audit_info *info = par->targinfo; @@ -181,23 +189,33 @@ static int audit_tg_check(const struct xt_tgchk_param *par) return 0; } -static struct xt_target audit_tg_reg __read_mostly = { - .name = "AUDIT", - .family = NFPROTO_UNSPEC, - .target = audit_tg, - .targetsize = sizeof(struct xt_audit_info), - .checkentry = audit_tg_check, - .me = THIS_MODULE, +static struct xt_target audit_tg_reg[] __read_mostly = { + { + .name = "AUDIT", + .family = NFPROTO_UNSPEC, + .target = audit_tg, + .targetsize = sizeof(struct xt_audit_info), + .checkentry = audit_tg_check, + .me = THIS_MODULE, + }, + { + .name = "AUDIT", + .family = NFPROTO_BRIDGE, + .target = audit_tg_ebt, + .targetsize = sizeof(struct xt_audit_info), + .checkentry = audit_tg_check, + .me = THIS_MODULE, + }, }; static int __init audit_tg_init(void) { - return xt_register_target(&audit_tg_reg); + return xt_register_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg)); } static void __exit audit_tg_exit(void) { - xt_unregister_target(&audit_tg_reg); + xt_unregister_targets(audit_tg_reg, ARRAY_SIZE(audit_tg_reg)); } module_init(audit_tg_init); -- cgit v1.1 From 8a4eb5734e8d1dc60a8c28576bbbdfdcc643626d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 12 Mar 2011 03:14:39 +0000 Subject: net: introduce rx_handler results and logic around that MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch allows rx_handlers to better signalize what to do next to it's caller. That makes skb->deliver_no_wcard no longer needed. kernel-doc for rx_handler_result is taken from Nicolas' patch. Signed-off-by: Jiri Pirko Reviewed-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- net/bridge/br_input.c | 25 +++++++++++++++---------- net/bridge/br_private.h | 2 +- net/core/dev.c | 21 ++++++++++++++------- net/core/skbuff.c | 1 - 4 files changed, 30 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 88e4aa9..e216079 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -139,21 +139,22 @@ static inline int is_link_local(const unsigned char *dest) * Return NULL if skb is handled * note: already called with rcu_read_lock */ -struct sk_buff *br_handle_frame(struct sk_buff *skb) +rx_handler_result_t br_handle_frame(struct sk_buff **pskb) { struct net_bridge_port *p; + struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; br_should_route_hook_t *rhook; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) - return skb; + return RX_HANDLER_PASS; if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) - return NULL; + return RX_HANDLER_CONSUMED; p = br_port_get_rcu(skb->dev); @@ -167,10 +168,12 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb) goto forward; if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_handle_local_finish)) - return NULL; /* frame consumed by filter */ - else - return skb; /* continue processing */ + NULL, br_handle_local_finish)) { + return RX_HANDLER_CONSUMED; /* consumed by filter */ + } else { + *pskb = skb; + return RX_HANDLER_PASS; /* continue processing */ + } } forward: @@ -178,8 +181,10 @@ forward: case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); if (rhook) { - if ((*rhook)(skb)) - return skb; + if ((*rhook)(skb)) { + *pskb = skb; + return RX_HANDLER_PASS; + } dest = eth_hdr(skb)->h_dest; } /* fall through */ @@ -194,5 +199,5 @@ forward: drop: kfree_skb(skb); } - return NULL; + return RX_HANDLER_CONSUMED; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f7afc36..19e2f46 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -379,7 +379,7 @@ extern void br_features_recompute(struct net_bridge *br); /* br_input.c */ extern int br_handle_frame_finish(struct sk_buff *skb); -extern struct sk_buff *br_handle_frame(struct sk_buff *skb); +extern rx_handler_result_t br_handle_frame(struct sk_buff **pskb); /* br_ioctl.c */ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); diff --git a/net/core/dev.c b/net/core/dev.c index 0d39032..0b88eba 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3070,6 +3070,8 @@ out: * on a failure. * * The caller must hold the rtnl_mutex. + * + * For a general description of rx_handler, see enum rx_handler_result. */ int netdev_rx_handler_register(struct net_device *dev, rx_handler_func_t *rx_handler, @@ -3129,6 +3131,7 @@ static int __netif_receive_skb(struct sk_buff *skb) rx_handler_func_t *rx_handler; struct net_device *orig_dev; struct net_device *null_or_dev; + bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; @@ -3181,18 +3184,22 @@ ncls: rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { - struct net_device *prev_dev; - if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - prev_dev = skb->dev; - skb = rx_handler(skb); - if (!skb) + switch (rx_handler(&skb)) { + case RX_HANDLER_CONSUMED: goto out; - if (skb->dev != prev_dev) + case RX_HANDLER_ANOTHER: goto another_round; + case RX_HANDLER_EXACT: + deliver_exact = true; + case RX_HANDLER_PASS: + break; + default: + BUG(); + } } if (vlan_tx_tag_present(skb)) { @@ -3210,7 +3217,7 @@ ncls: vlan_on_bond_hook(skb); /* deliver only exact match when indicated */ - null_or_dev = skb->deliver_no_wcard ? skb->dev : NULL; + null_or_dev = deliver_exact ? skb->dev : NULL; type = skb->protocol; list_for_each_entry_rcu(ptype, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1eb526a..801dd08 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -523,7 +523,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->ip_summed = old->ip_summed; skb_copy_queue_mapping(new, old); new->priority = old->priority; - new->deliver_no_wcard = old->deliver_no_wcard; #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) new->ipvs_property = old->ipvs_property; #endif -- cgit v1.1 From fbd5060875d25f7764fd1c3d35b83a8ed1d88d7b Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 15 Mar 2011 21:12:49 +0000 Subject: xfrm: Refcount destination entry on xfrm_lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We return a destination entry without refcount if a socket policy is found in xfrm_lookup. This triggers a warning on a negative refcount when freeeing this dst entry. So take a refcount in this case to fix it. This refcount was forgotten when xfrm changed to cache bundles instead of policies for outgoing flows. Signed-off-by: Steffen Klassert Acked-by: Timo Teräs Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 027e3c6..15792d8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1804,6 +1804,8 @@ restart: goto no_transform; } + dst_hold(&xdst->u.dst); + spin_lock_bh(&xfrm_policy_sk_bundle_lock); xdst->u.dst.next = xfrm_policy_sk_bundles; xfrm_policy_sk_bundles = &xdst->u.dst; -- cgit v1.1