diff options
Diffstat (limited to 'net')
43 files changed, 1403 insertions, 205 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig new file mode 100644 index 0000000..be33d27 --- /dev/null +++ b/net/802/Kconfig @@ -0,0 +1,7 @@ +config STP + tristate + select LLC + +config GARP + tristate + select STP diff --git a/net/802/Makefile b/net/802/Makefile index 68569ff..7893d67 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -10,3 +10,5 @@ obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o +obj-$(CONFIG_STP) += stp.o +obj-$(CONFIG_GARP) += garp.o diff --git a/net/802/garp.c b/net/802/garp.c new file mode 100644 index 0000000..3b78f7b --- /dev/null +++ b/net/802/garp.c @@ -0,0 +1,633 @@ +/* + * IEEE 802.1D Generic Attribute Registration Protocol (GARP) + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/llc.h> +#include <net/llc.h> +#include <net/llc_pdu.h> +#include <net/garp.h> +#include <asm/unaligned.h> + +static unsigned int garp_join_time __read_mostly = 200; +module_param(garp_join_time, uint, 0644); +MODULE_PARM_DESC(garp_join_time, "Join time in ms (default 200ms)"); +MODULE_LICENSE("GPL"); + +static const struct garp_state_trans { + u8 state; + u8 action; +} garp_applicant_state_table[GARP_APPLICANT_MAX + 1][GARP_EVENT_MAX + 1] = { + [GARP_APPLICANT_VA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_AA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_AA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_QA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_QA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA }, + }, + [GARP_APPLICANT_LA] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_VO, + .action = GARP_ACTION_S_LEAVE_EMPTY }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_LA }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_VA }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_VP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_AA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_VO }, + }, + [GARP_APPLICANT_AP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_QA, + .action = GARP_ACTION_S_JOIN_IN }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_AO }, + }, + [GARP_APPLICANT_QP] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_QO }, + }, + [GARP_APPLICANT_VO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_VP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_AO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_AP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, + [GARP_APPLICANT_QO] = { + [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID }, + [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QO }, + [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO }, + [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_QP }, + [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID }, + }, +}; + +static int garp_attr_cmp(const struct garp_attr *attr, + const void *data, u8 len, u8 type) +{ + if (attr->type != type) + return attr->type - type; + if (attr->dlen != len) + return attr->dlen - len; + return memcmp(attr->data, data, len); +} + +static struct garp_attr *garp_attr_lookup(const struct garp_applicant *app, + const void *data, u8 len, u8 type) +{ + struct rb_node *parent = app->gid.rb_node; + struct garp_attr *attr; + int d; + + while (parent) { + attr = rb_entry(parent, struct garp_attr, node); + d = garp_attr_cmp(attr, data, len, type); + if (d < 0) + parent = parent->rb_left; + else if (d > 0) + parent = parent->rb_right; + else + return attr; + } + return NULL; +} + +static void garp_attr_insert(struct garp_applicant *app, struct garp_attr *new) +{ + struct rb_node *parent = NULL, **p = &app->gid.rb_node; + struct garp_attr *attr; + int d; + + while (*p) { + parent = *p; + attr = rb_entry(parent, struct garp_attr, node); + d = garp_attr_cmp(attr, new->data, new->dlen, new->type); + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + } + rb_link_node(&new->node, parent, p); + rb_insert_color(&new->node, &app->gid); +} + +static struct garp_attr *garp_attr_create(struct garp_applicant *app, + const void *data, u8 len, u8 type) +{ + struct garp_attr *attr; + + attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC); + if (!attr) + return attr; + attr->state = GARP_APPLICANT_VO; + attr->type = type; + attr->dlen = len; + memcpy(attr->data, data, len); + garp_attr_insert(app, attr); + return attr; +} + +static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr) +{ + rb_erase(&attr->node, &app->gid); + kfree(attr); +} + +static int garp_pdu_init(struct garp_applicant *app) +{ + struct sk_buff *skb; + struct garp_pdu_hdr *gp; + +#define LLC_RESERVE sizeof(struct llc_pdu_un) + skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev), + GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + skb->dev = app->dev; + skb->protocol = htons(ETH_P_802_2); + skb_reserve(skb, LL_RESERVED_SPACE(app->dev) + LLC_RESERVE); + + gp = (struct garp_pdu_hdr *)__skb_put(skb, sizeof(*gp)); + put_unaligned(htons(GARP_PROTOCOL_ID), &gp->protocol); + + app->pdu = skb; + return 0; +} + +static int garp_pdu_append_end_mark(struct garp_applicant *app) +{ + if (skb_tailroom(app->pdu) < sizeof(u8)) + return -1; + *(u8 *)__skb_put(app->pdu, sizeof(u8)) = GARP_END_MARK; + return 0; +} + +static void garp_pdu_queue(struct garp_applicant *app) +{ + if (!app->pdu) + return; + + garp_pdu_append_end_mark(app); + garp_pdu_append_end_mark(app); + + llc_pdu_header_init(app->pdu, LLC_PDU_TYPE_U, LLC_SAP_BSPAN, + LLC_SAP_BSPAN, LLC_PDU_CMD); + llc_pdu_init_as_ui_cmd(app->pdu); + llc_mac_hdr_init(app->pdu, app->dev->dev_addr, + app->app->proto.group_address); + + skb_queue_tail(&app->queue, app->pdu); + app->pdu = NULL; +} + +static void garp_queue_xmit(struct garp_applicant *app) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&app->queue))) + dev_queue_xmit(skb); +} + +static int garp_pdu_append_msg(struct garp_applicant *app, u8 attrtype) +{ + struct garp_msg_hdr *gm; + + if (skb_tailroom(app->pdu) < sizeof(*gm)) + return -1; + gm = (struct garp_msg_hdr *)__skb_put(app->pdu, sizeof(*gm)); + gm->attrtype = attrtype; + garp_cb(app->pdu)->cur_type = attrtype; + return 0; +} + +static int garp_pdu_append_attr(struct garp_applicant *app, + const struct garp_attr *attr, + enum garp_attr_event event) +{ + struct garp_attr_hdr *ga; + unsigned int len; + int err; +again: + if (!app->pdu) { + err = garp_pdu_init(app); + if (err < 0) + return err; + } + + if (garp_cb(app->pdu)->cur_type != attr->type) { + if (garp_cb(app->pdu)->cur_type && + garp_pdu_append_end_mark(app) < 0) + goto queue; + if (garp_pdu_append_msg(app, attr->type) < 0) + goto queue; + } + + len = sizeof(*ga) + attr->dlen; + if (skb_tailroom(app->pdu) < len) + goto queue; + ga = (struct garp_attr_hdr *)__skb_put(app->pdu, len); + ga->len = len; + ga->event = event; + memcpy(ga->data, attr->data, attr->dlen); + return 0; + +queue: + garp_pdu_queue(app); + goto again; +} + +static void garp_attr_event(struct garp_applicant *app, + struct garp_attr *attr, enum garp_event event) +{ + enum garp_applicant_state state; + + state = garp_applicant_state_table[attr->state][event].state; + if (state == GARP_APPLICANT_INVALID) + return; + + switch (garp_applicant_state_table[attr->state][event].action) { + case GARP_ACTION_NONE: + break; + case GARP_ACTION_S_JOIN_IN: + garp_pdu_append_attr(app, attr, GARP_JOIN_IN); + break; + case GARP_ACTION_S_LEAVE_EMPTY: + garp_pdu_append_attr(app, attr, GARP_LEAVE_EMPTY); + /* As a pure applicant, sending a leave message implies that + * the attribute was unregistered and can be destroyed. */ + garp_attr_destroy(app, attr); + return; + default: + WARN_ON(1); + } + + attr->state = state; +} + +int garp_request_join(const struct net_device *dev, + const struct garp_application *appl, + const void *data, u8 len, u8 type) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + struct garp_attr *attr; + + spin_lock_bh(&app->lock); + attr = garp_attr_create(app, data, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return -ENOMEM; + } + garp_attr_event(app, attr, GARP_EVENT_REQ_JOIN); + spin_unlock_bh(&app->lock); + return 0; +} +EXPORT_SYMBOL_GPL(garp_request_join); + +void garp_request_leave(const struct net_device *dev, + const struct garp_application *appl, + const void *data, u8 len, u8 type) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + struct garp_attr *attr; + + spin_lock_bh(&app->lock); + attr = garp_attr_lookup(app, data, len, type); + if (!attr) { + spin_unlock_bh(&app->lock); + return; + } + garp_attr_event(app, attr, GARP_EVENT_REQ_LEAVE); + spin_unlock_bh(&app->lock); +} +EXPORT_SYMBOL_GPL(garp_request_leave); + +static void garp_gid_event(struct garp_applicant *app, enum garp_event event) +{ + struct rb_node *node, *next; + struct garp_attr *attr; + + for (node = rb_first(&app->gid); + next = node ? rb_next(node) : NULL, node != NULL; + node = next) { + attr = rb_entry(node, struct garp_attr, node); + garp_attr_event(app, attr, event); + } +} + +static void garp_join_timer_arm(struct garp_applicant *app) +{ + unsigned long delay; + + delay = (u64)msecs_to_jiffies(garp_join_time) * net_random() >> 32; + mod_timer(&app->join_timer, jiffies + delay); +} + +static void garp_join_timer(unsigned long data) +{ + struct garp_applicant *app = (struct garp_applicant *)data; + + spin_lock(&app->lock); + garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); + garp_pdu_queue(app); + spin_unlock(&app->lock); + + garp_queue_xmit(app); + garp_join_timer_arm(app); +} + +static int garp_pdu_parse_end_mark(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, sizeof(u8))) + return -1; + if (*skb->data == GARP_END_MARK) { + skb_pull(skb, sizeof(u8)); + return -1; + } + return 0; +} + +static int garp_pdu_parse_attr(struct garp_applicant *app, struct sk_buff *skb, + u8 attrtype) +{ + const struct garp_attr_hdr *ga; + struct garp_attr *attr; + enum garp_event event; + unsigned int dlen; + + if (!pskb_may_pull(skb, sizeof(*ga))) + return -1; + ga = (struct garp_attr_hdr *)skb->data; + if (ga->len < sizeof(*ga)) + return -1; + + if (!pskb_may_pull(skb, ga->len)) + return -1; + skb_pull(skb, ga->len); + dlen = sizeof(*ga) - ga->len; + + if (attrtype > app->app->maxattr) + return 0; + + switch (ga->event) { + case GARP_LEAVE_ALL: + if (dlen != 0) + return -1; + garp_gid_event(app, GARP_EVENT_R_LEAVE_EMPTY); + return 0; + case GARP_JOIN_EMPTY: + event = GARP_EVENT_R_JOIN_EMPTY; + break; + case GARP_JOIN_IN: + event = GARP_EVENT_R_JOIN_IN; + break; + case GARP_LEAVE_EMPTY: + event = GARP_EVENT_R_LEAVE_EMPTY; + break; + case GARP_EMPTY: + event = GARP_EVENT_R_EMPTY; + break; + default: + return 0; + } + + if (dlen == 0) + return -1; + attr = garp_attr_lookup(app, ga->data, dlen, attrtype); + if (attr == NULL) + return 0; + garp_attr_event(app, attr, event); + return 0; +} + +static int garp_pdu_parse_msg(struct garp_applicant *app, struct sk_buff *skb) +{ + const struct garp_msg_hdr *gm; + + if (!pskb_may_pull(skb, sizeof(*gm))) + return -1; + gm = (struct garp_msg_hdr *)skb->data; + if (gm->attrtype == 0) + return -1; + skb_pull(skb, sizeof(*gm)); + + while (skb->len > 0) { + if (garp_pdu_parse_attr(app, skb, gm->attrtype) < 0) + return -1; + if (garp_pdu_parse_end_mark(skb) < 0) + break; + } + return 0; +} + +static void garp_pdu_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) +{ + struct garp_application *appl = proto->data; + struct garp_port *port; + struct garp_applicant *app; + const struct garp_pdu_hdr *gp; + + port = rcu_dereference(dev->garp_port); + if (!port) + goto err; + app = rcu_dereference(port->applicants[appl->type]); + if (!app) + goto err; + + if (!pskb_may_pull(skb, sizeof(*gp))) + goto err; + gp = (struct garp_pdu_hdr *)skb->data; + if (get_unaligned(&gp->protocol) != htons(GARP_PROTOCOL_ID)) + goto err; + skb_pull(skb, sizeof(*gp)); + + spin_lock(&app->lock); + while (skb->len > 0) { + if (garp_pdu_parse_msg(app, skb) < 0) + break; + if (garp_pdu_parse_end_mark(skb) < 0) + break; + } + spin_unlock(&app->lock); +err: + kfree_skb(skb); +} + +static int garp_init_port(struct net_device *dev) +{ + struct garp_port *port; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return -ENOMEM; + rcu_assign_pointer(dev->garp_port, port); + return 0; +} + +static void garp_release_port(struct net_device *dev) +{ + struct garp_port *port = dev->garp_port; + unsigned int i; + + for (i = 0; i <= GARP_APPLICATION_MAX; i++) { + if (port->applicants[i]) + return; + } + rcu_assign_pointer(dev->garp_port, NULL); + synchronize_rcu(); + kfree(port); +} + +int garp_init_applicant(struct net_device *dev, struct garp_application *appl) +{ + struct garp_applicant *app; + int err; + + ASSERT_RTNL(); + + if (!dev->garp_port) { + err = garp_init_port(dev); + if (err < 0) + goto err1; + } + + err = -ENOMEM; + app = kzalloc(sizeof(*app), GFP_KERNEL); + if (!app) + goto err2; + + err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0); + if (err < 0) + goto err3; + + app->dev = dev; + app->app = appl; + app->gid = RB_ROOT; + spin_lock_init(&app->lock); + skb_queue_head_init(&app->queue); + rcu_assign_pointer(dev->garp_port->applicants[appl->type], app); + setup_timer(&app->join_timer, garp_join_timer, (unsigned long)app); + garp_join_timer_arm(app); + return 0; + +err3: + kfree(app); +err2: + garp_release_port(dev); +err1: + return err; +} +EXPORT_SYMBOL_GPL(garp_init_applicant); + +void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl) +{ + struct garp_port *port = dev->garp_port; + struct garp_applicant *app = port->applicants[appl->type]; + + ASSERT_RTNL(); + + rcu_assign_pointer(port->applicants[appl->type], NULL); + synchronize_rcu(); + + /* Delete timer and generate a final TRANSMIT_PDU event to flush out + * all pending messages before the applicant is gone. */ + del_timer_sync(&app->join_timer); + garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); + garp_pdu_queue(app); + garp_queue_xmit(app); + + dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0); + kfree(app); + garp_release_port(dev); +} +EXPORT_SYMBOL_GPL(garp_uninit_applicant); + +int garp_register_application(struct garp_application *appl) +{ + appl->proto.rcv = garp_pdu_rcv; + appl->proto.data = appl; + return stp_proto_register(&appl->proto); +} +EXPORT_SYMBOL_GPL(garp_register_application); + +void garp_unregister_application(struct garp_application *appl) +{ + stp_proto_unregister(&appl->proto); +} +EXPORT_SYMBOL_GPL(garp_unregister_application); diff --git a/net/802/stp.c b/net/802/stp.c new file mode 100644 index 0000000..0b7a244 --- /dev/null +++ b/net/802/stp.c @@ -0,0 +1,102 @@ +/* + * STP SAP demux + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/llc.h> +#include <net/llc.h> +#include <net/llc_pdu.h> +#include <net/stp.h> + +/* 01:80:c2:00:00:20 - 01:80:c2:00:00:2F */ +#define GARP_ADDR_MIN 0x20 +#define GARP_ADDR_MAX 0x2F +#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN) + +static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly; +static const struct stp_proto *stp_proto __read_mostly; + +static struct llc_sap *sap __read_mostly; +static unsigned int sap_registered; +static DEFINE_MUTEX(stp_proto_mutex); + +/* Called under rcu_read_lock from LLC */ +static int stp_pdu_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + const struct ethhdr *eh = eth_hdr(skb); + const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); + const struct stp_proto *proto; + + if (pdu->ssap != LLC_SAP_BSPAN || + pdu->dsap != LLC_SAP_BSPAN || + pdu->ctrl_1 != LLC_PDU_TYPE_U) + goto err; + + if (eh->h_dest[5] >= GARP_ADDR_MIN && eh->h_dest[5] <= GARP_ADDR_MAX) { + proto = rcu_dereference(garp_protos[eh->h_dest[5] - + GARP_ADDR_MIN]); + if (proto && + compare_ether_addr(eh->h_dest, proto->group_address)) + goto err; + } else + proto = rcu_dereference(stp_proto); + + if (!proto) + goto err; + + proto->rcv(proto, skb, dev); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +int stp_proto_register(const struct stp_proto *proto) +{ + int err = 0; + + mutex_lock(&stp_proto_mutex); + if (sap_registered++ == 0) { + sap = llc_sap_open(LLC_SAP_BSPAN, stp_pdu_rcv); + if (!sap) { + err = -ENOMEM; + goto out; + } + } + if (is_zero_ether_addr(proto->group_address)) + rcu_assign_pointer(stp_proto, proto); + else + rcu_assign_pointer(garp_protos[proto->group_address[5] - + GARP_ADDR_MIN], proto); +out: + mutex_unlock(&stp_proto_mutex); + return err; +} +EXPORT_SYMBOL_GPL(stp_proto_register); + +void stp_proto_unregister(const struct stp_proto *proto) +{ + mutex_lock(&stp_proto_mutex); + if (is_zero_ether_addr(proto->group_address)) + rcu_assign_pointer(stp_proto, NULL); + else + rcu_assign_pointer(garp_protos[proto->group_address[5] - + GARP_ADDR_MIN], NULL); + synchronize_rcu(); + + if (--sap_registered == 0) + llc_sap_put(sap); + mutex_unlock(&stp_proto_mutex); +} +EXPORT_SYMBOL_GPL(stp_proto_unregister); + +MODULE_LICENSE("GPL"); diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index c4a382e..fa073a5 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -17,3 +17,13 @@ config VLAN_8021Q will be called 8021q. If unsure, say N. + +config VLAN_8021Q_GVRP + bool "GVRP (GARP VLAN Registration Protocol) support" + depends on VLAN_8021Q + select GARP + help + Select this to enable GVRP end-system support. GVRP is used for + automatic propagation of registered VLANs to switches. + + If unsure, say N. diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 10ca7f4..3006e9e 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -4,9 +4,6 @@ obj-$(CONFIG_VLAN_8021Q) += 8021q.o -8021q-objs := vlan.o vlan_dev.o vlan_netlink.o - -ifeq ($(CONFIG_PROC_FS),y) -8021q-objs += vlanproc.o -endif - +8021q-y := vlan.o vlan_dev.o vlan_netlink.o +8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o +8021q-$(CONFIG_PROC_FS) += vlanproc.o diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ab2225d..b529110 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -165,8 +165,12 @@ void unregister_vlan_dev(struct net_device *dev) synchronize_net(); + unregister_netdevice(dev); + /* If the group is now empty, kill off the group. */ if (grp->nr_vlans == 0) { + vlan_gvrp_uninit_applicant(real_dev); + if (real_dev->features & NETIF_F_HW_VLAN_RX) real_dev->vlan_rx_register(real_dev, NULL); @@ -178,8 +182,6 @@ void unregister_vlan_dev(struct net_device *dev) /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); - - unregister_netdevice(dev); } static void vlan_transfer_operstate(const struct net_device *dev, @@ -249,15 +251,18 @@ int register_vlan_dev(struct net_device *dev) ngrp = grp = vlan_group_alloc(real_dev); if (!grp) return -ENOBUFS; + err = vlan_gvrp_init_applicant(real_dev); + if (err < 0) + goto out_free_group; } err = vlan_group_prealloc_vid(grp, vlan_id); if (err < 0) - goto out_free_group; + goto out_uninit_applicant; err = register_netdevice(dev); if (err < 0) - goto out_free_group; + goto out_uninit_applicant; /* Account for reference in struct vlan_dev_info */ dev_hold(real_dev); @@ -278,6 +283,9 @@ int register_vlan_dev(struct net_device *dev) return 0; +out_uninit_applicant: + if (ngrp) + vlan_gvrp_uninit_applicant(real_dev); out_free_group: if (ngrp) vlan_group_free(ngrp); @@ -591,9 +599,9 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) err = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - err = vlan_dev_set_vlan_flag(dev, - args.u.flag, - args.vlan_qos); + err = vlan_dev_change_flags(dev, + args.vlan_qos ? args.u.flag : 0, + args.u.flag); break; case SET_VLAN_NAME_TYPE_CMD: @@ -713,14 +721,20 @@ static int __init vlan_proto_init(void) if (err < 0) goto err2; - err = vlan_netlink_init(); + err = vlan_gvrp_init(); if (err < 0) goto err3; + err = vlan_netlink_init(); + if (err < 0) + goto err4; + dev_add_pack(&vlan_packet_type); vlan_ioctl_set(vlan_ioctl_handler); return 0; +err4: + vlan_gvrp_uninit(); err3: unregister_netdevice_notifier(&vlan_notifier_block); err2: @@ -745,8 +759,9 @@ static void __exit vlan_cleanup_module(void) BUG_ON(!hlist_empty(&vlan_group_hash[i])); unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops); - synchronize_net(); + + vlan_gvrp_uninit(); } module_init(vlan_proto_init); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 5229a72..097b2e0 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -28,8 +28,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, short vlan_prio); int vlan_dev_set_egress_priority(const struct net_device *dev, u32 skb_prio, short vlan_prio); -int vlan_dev_set_vlan_flag(const struct net_device *dev, - u32 flag, short flag_val); +int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); @@ -38,6 +37,22 @@ void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev); +#ifdef CONFIG_VLAN_8021Q_GVRP +extern int vlan_gvrp_request_join(const struct net_device *dev); +extern void vlan_gvrp_request_leave(const struct net_device *dev); +extern int vlan_gvrp_init_applicant(struct net_device *dev); +extern void vlan_gvrp_uninit_applicant(struct net_device *dev); +extern int vlan_gvrp_init(void); +extern void vlan_gvrp_uninit(void); +#else +static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; } +static inline void vlan_gvrp_request_leave(const struct net_device *dev) {} +static inline int vlan_gvrp_init_applicant(struct net_device *dev) { return 0; } +static inline void vlan_gvrp_uninit_applicant(struct net_device *dev) {} +static inline int vlan_gvrp_init(void) { return 0; } +static inline void vlan_gvrp_uninit(void) {} +#endif + int vlan_netlink_init(void); void vlan_netlink_fini(void); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 5d055c2..a0617bf 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -507,18 +507,23 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, } /* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */ -int vlan_dev_set_vlan_flag(const struct net_device *dev, - u32 flag, short flag_val) +int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask) { - /* verify flag is supported */ - if (flag == VLAN_FLAG_REORDER_HDR) { - if (flag_val) - vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR; + struct vlan_dev_info *vlan = vlan_dev_info(dev); + u32 old_flags = vlan->flags; + + if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) + return -EINVAL; + + vlan->flags = (old_flags & ~mask) | (flags & mask); + + if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_GVRP) { + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_join(dev); else - vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; - return 0; + vlan_gvrp_request_leave(dev); } - return -EINVAL; + return 0; } void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) @@ -552,12 +557,19 @@ static int vlan_dev_open(struct net_device *dev) if (dev->flags & IFF_PROMISC) dev_set_promiscuity(real_dev, 1); + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_join(dev); + return 0; } static int vlan_dev_stop(struct net_device *dev) { - struct net_device *real_dev = vlan_dev_info(dev)->real_dev; + struct vlan_dev_info *vlan = vlan_dev_info(dev); + struct net_device *real_dev = vlan->real_dev; + + if (vlan->flags & VLAN_FLAG_GVRP) + vlan_gvrp_request_leave(dev); dev_mc_unsync(real_dev, dev); dev_unicast_unsync(real_dev, dev); diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c new file mode 100644 index 0000000..db97816 --- /dev/null +++ b/net/8021q/vlan_gvrp.c @@ -0,0 +1,66 @@ +/* + * IEEE 802.1Q GARP VLAN Registration Protocol (GVRP) + * + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/if_vlan.h> +#include <net/garp.h> +#include "vlan.h" + +#define GARP_GVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 } + +enum gvrp_attributes { + GVRP_ATTR_INVALID, + GVRP_ATTR_VID, + __GVRP_ATTR_MAX +}; +#define GVRP_ATTR_MAX (__GVRP_ATTR_MAX - 1) + +static struct garp_application vlan_gvrp_app __read_mostly = { + .proto.group_address = GARP_GVRP_ADDRESS, + .maxattr = GVRP_ATTR_MAX, + .type = GARP_APPLICATION_GVRP, +}; + +int vlan_gvrp_request_join(const struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + __be16 vid = htons(vlan->vlan_id); + + return garp_request_join(vlan->real_dev, &vlan_gvrp_app, + &vid, sizeof(vid), GVRP_ATTR_VID); +} + +void vlan_gvrp_request_leave(const struct net_device *dev) +{ + const struct vlan_dev_info *vlan = vlan_dev_info(dev); + __be16 vid = htons(vlan->vlan_id); + + garp_request_leave(vlan->real_dev, &vlan_gvrp_app, + &vid, sizeof(vid), GVRP_ATTR_VID); +} + +int vlan_gvrp_init_applicant(struct net_device *dev) +{ + return garp_init_applicant(dev, &vlan_gvrp_app); +} + +void vlan_gvrp_uninit_applicant(struct net_device *dev) +{ + garp_uninit_applicant(dev, &vlan_gvrp_app); +} + +int __init vlan_gvrp_init(void) +{ + return garp_register_application(&vlan_gvrp_app); +} + +void vlan_gvrp_uninit(void) +{ + garp_unregister_application(&vlan_gvrp_app); +} diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index c93e69e..e9c91dc 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -59,7 +59,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) } if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR) + if ((flags->flags & flags->mask) & + ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP)) return -EINVAL; } @@ -75,7 +76,6 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct vlan_dev_info *vlan = vlan_dev_info(dev); struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; struct nlattr *attr; @@ -83,8 +83,7 @@ static int vlan_changelink(struct net_device *dev, if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - vlan->flags = (vlan->flags & ~flags->mask) | - (flags->flags & flags->mask); + vlan_dev_change_flags(dev, flags->flags, flags->mask); } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { diff --git a/net/Kconfig b/net/Kconfig index acbf7c60..b986687 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -181,6 +181,7 @@ source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/tipc/Kconfig" source "net/atm/Kconfig" +source "net/802/Kconfig" source "net/bridge/Kconfig" source "net/8021q/Kconfig" source "net/decnet/Kconfig" diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 12265af..e143ca6 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -5,6 +5,7 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC + select STP ---help--- If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/bridge/br.c b/net/bridge/br.c index cede010..573acdf 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -18,21 +18,24 @@ #include <linux/init.h> #include <linux/llc.h> #include <net/llc.h> +#include <net/stp.h> #include "br_private.h" int (*br_should_route_hook)(struct sk_buff *skb); -static struct llc_sap *br_stp_sap; +static const struct stp_proto br_stp_proto = { + .rcv = br_stp_rcv, +}; static int __init br_init(void) { int err; - br_stp_sap = llc_sap_open(LLC_SAP_BSPAN, br_stp_rcv); - if (!br_stp_sap) { + err = stp_proto_register(&br_stp_proto); + if (err < 0) { printk(KERN_ERR "bridge: can't register sap for STP\n"); - return -EADDRINUSE; + return err; } err = br_fdb_init(); @@ -65,13 +68,13 @@ err_out2: err_out1: br_fdb_fini(); err_out: - llc_sap_put(br_stp_sap); + stp_proto_unregister(&br_stp_proto); return err; } static void __exit br_deinit(void) { - rcu_assign_pointer(br_stp_sap->rcv_func, NULL); + stp_proto_unregister(&br_stp_proto); br_netlink_fini(); unregister_netdevice_notifier(&br_device_notifier); @@ -82,7 +85,6 @@ static void __exit br_deinit(void) synchronize_net(); br_netfilter_fini(); - llc_sap_put(br_stp_sap); br_fdb_get_hook = NULL; br_fdb_put_hook = NULL; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 8593c9f..815ed38 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -226,8 +226,9 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p, extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id); /* br_stp_bpdu.c */ -extern int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); +struct stp_proto; +extern void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev); /* br_stp_timer.c */ extern void br_stp_timer_init(struct net_bridge *br); diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 9dc2de6..9964761 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -18,6 +18,7 @@ #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> +#include <net/stp.h> #include <asm/unaligned.h> #include "br_private.h" @@ -131,10 +132,9 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) * * NO locks, but rcu_read_lock (preempt_disabled) */ -int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) +void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, + struct net_device *dev) { - const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = rcu_dereference(dev->br_port); struct net_bridge *br; @@ -146,11 +146,6 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, if (!p) goto err; - if (pdu->ssap != LLC_SAP_BSPAN - || pdu->dsap != LLC_SAP_BSPAN - || pdu->ctrl_1 != LLC_PDU_TYPE_U) - goto err; - if (!pskb_may_pull(skb, 4)) goto err; @@ -224,5 +219,4 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev, spin_unlock(&br->lock); err: kfree_skb(skb); - return 0; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 0133b5e..14ada53 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) return 0; } +static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc cmd; + + if (!dev->ethtool_ops->set_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + return dev->ethtool_ops->set_rxhash(dev, &cmd); +} + +static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_rxnfc info; + + if (!dev->ethtool_ops->get_rxhash) + return -EOPNOTSUPP; + + if (copy_from_user(&info, useraddr, sizeof(info))) + return -EFAULT; + + dev->ethtool_ops->get_rxhash(dev, &info); + + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; +} + static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; @@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GGSO: case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: + case ETHTOOL_GRXFH: break; default: if (!capable(CAP_NET_ADMIN)) @@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_set_value(dev, useraddr, dev->ethtool_ops->set_priv_flags); break; + case ETHTOOL_GRXFH: + rc = ethtool_get_rxhash(dev, useraddr); + break; + case ETHTOOL_SRXFH: + rc = ethtool_set_rxhash(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e3e9ab0..1c2943a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops) static void flush_route_cache(struct fib_rules_ops *ops) { if (ops->flush_cache) - ops->flush_cache(); + ops->flush_cache(ops); } int fib_rules_register(struct fib_rules_ops *ops) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 5b7539b..14fbca5 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -229,7 +229,7 @@ static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops) return 0; } -static void dn_fib_rule_flush_cache(void) +static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) { dn_rt_cache_flush(-1); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 42bd24b..dc41133 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1479,14 +1479,15 @@ static int __init inet_init(void) * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) - ip_mr_init(); + if (ip_mr_init()) + printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n"); #endif /* * Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) - printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; + printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ipv4_proc_init(); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 20c515a..29df75a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1197,7 +1197,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); break; default: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9de2514..2e667e2 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1348,7 +1348,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, dev_disable_lro(idev->dev); } rtnl_unlock(); - rt_cache_flush(0); + rt_cache_flush(net, 0); } } @@ -1362,9 +1362,10 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, int *valp = ctl->data; int val = *valp; int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + struct net *net = ctl->extra2; if (write && *valp != val) - rt_cache_flush(0); + rt_cache_flush(net, 0); return ret; } @@ -1375,9 +1376,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, { int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, newval, newlen); + struct net *net = table->extra2; if (ret == 1) - rt_cache_flush(0); + rt_cache_flush(net, 0); return ret; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5ad01d6..65c1503 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -144,7 +144,7 @@ static void fib_flush(struct net *net) } if (flushed) - rt_cache_flush(-1); + rt_cache_flush(net, -1); } /* @@ -897,21 +897,22 @@ static void fib_disable_ip(struct net_device *dev, int force) { if (fib_sync_down_dev(dev, force)) fib_flush(dev_net(dev)); - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); arp_ifdown(dev); } static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr*)ptr; + struct net_device *dev = ifa->ifa_dev->dev; switch (event) { case NETDEV_UP: fib_add_ifaddr(ifa); #ifdef CONFIG_IP_ROUTE_MULTIPATH - fib_sync_up(ifa->ifa_dev->dev); + fib_sync_up(dev); #endif - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_del_ifaddr(ifa); @@ -919,9 +920,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. Disable IP. */ - fib_disable_ip(ifa->ifa_dev->dev, 1); + fib_disable_ip(dev, 1); } else { - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); } break; } @@ -949,14 +950,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); #endif - rt_cache_flush(-1); + rt_cache_flush(dev_net(dev), -1); break; case NETDEV_DOWN: fib_disable_ip(dev, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(0); + rt_cache_flush(dev_net(dev), 0); break; } return NOTIFY_DONE; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index eeec4bf..c8cac6c 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -472,7 +472,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); return 0; @@ -532,7 +532,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) if (new_f) fz->fz_nent++; - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -614,7 +614,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) write_unlock_bh(&fib_hash_lock); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); fn_free_alias(fa, f); if (kill_fn) { fn_free_node(f); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 1fb5687..6080d71 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -258,9 +258,9 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(4); /* flow */ } -static void fib4_rule_flush_cache(void) +static void fib4_rule_flush_cache(struct fib_rules_ops *ops) { - rt_cache_flush(-1); + rt_cache_flush(ops->fro_net, -1); } static struct fib_rules_ops fib4_rules_ops_template = { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 394db9c..d16ae46 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1271,7 +1271,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); @@ -1316,7 +1316,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) list_add_tail_rcu(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head)); - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, 0); succeeded: @@ -1664,7 +1664,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) trie_leaf_remove(t, l); if (fa->fa_state & FA_S_ACCESSED) - rt_cache_flush(-1); + rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 300ab0c..438fab9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1878,16 +1878,36 @@ static struct net_protocol pim_protocol = { * Setup for IP multicast routing */ -void __init ip_mr_init(void) +int __init ip_mr_init(void) { + int err; + mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + if (!mrt_cachep) + return -ENOMEM; + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); - register_netdevice_notifier(&ip_mr_notifier); + err = register_netdevice_notifier(&ip_mr_notifier); + if (err) + goto reg_notif_fail; #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); - proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); + err = -ENOMEM; + if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops)) + goto proc_cache_fail; #endif + return 0; +reg_notif_fail: + kmem_cache_destroy(mrt_cachep); +#ifdef CONFIG_PROC_FS +proc_vif_fail: + unregister_netdevice_notifier(&ip_mr_notifier); +proc_cache_fail: + proc_net_remove(&init_net, "ip_mr_vif"); +#endif + return err; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fe3a022..113cd25 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -132,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); -static struct timer_list rt_secret_timer; /* * Interface to generic destination cache. @@ -251,20 +250,25 @@ static inline void rt_hash_lock_init(void) static struct rt_hash_bucket *rt_hash_table __read_mostly; static unsigned rt_hash_mask __read_mostly; static unsigned int rt_hash_log __read_mostly; -static atomic_t rt_genid __read_mostly; static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) \ (__raw_get_cpu_var(rt_cache_stat).field++) -static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx) +static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, + int genid) { return jhash_3words((__force u32)(__be32)(daddr), (__force u32)(__be32)(saddr), - idx, atomic_read(&rt_genid)) + idx, genid) & rt_hash_mask; } +static inline int rt_genid(struct net *net) +{ + return atomic_read(&net->ipv4.rt_genid); +} + #ifdef CONFIG_PROC_FS struct rt_cache_iter_state { struct seq_net_private p; @@ -334,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) struct rt_cache_iter_state *st = seq->private; if (*pos) return rt_cache_get_idx(seq, *pos - 1); - st->genid = atomic_read(&rt_genid); + st->genid = rt_genid(seq_file_net(seq)); return SEQ_START_TOKEN; } @@ -681,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev); } +static inline int rt_is_expired(struct rtable *rth) +{ + return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); +} + /* * Perform a full scan of hash table and free all entries. * Can be called by a softirq or a process. @@ -690,6 +699,7 @@ static void rt_do_flush(int process_context) { unsigned int i; struct rtable *rth, *next; + struct rtable * tail; for (i = 0; i <= rt_hash_mask; i++) { if (process_context && need_resched()) @@ -699,11 +709,39 @@ static void rt_do_flush(int process_context) continue; spin_lock_bh(rt_hash_lock_addr(i)); +#ifdef CONFIG_NET_NS + { + struct rtable ** prev, * p; + + rth = rt_hash_table[i].chain; + + /* defer releasing the head of the list after spin_unlock */ + for (tail = rth; tail; tail = tail->u.dst.rt_next) + if (!rt_is_expired(tail)) + break; + if (rth != tail) + rt_hash_table[i].chain = tail; + + /* call rt_free on entries after the tail requiring flush */ + prev = &rt_hash_table[i].chain; + for (p = *prev; p; p = next) { + next = p->u.dst.rt_next; + if (!rt_is_expired(p)) { + prev = &p->u.dst.rt_next; + } else { + *prev = next; + rt_free(p); + } + } + } +#else rth = rt_hash_table[i].chain; rt_hash_table[i].chain = NULL; + tail = NULL; +#endif spin_unlock_bh(rt_hash_lock_addr(i)); - for (; rth; rth = next) { + for (; rth != tail; rth = next) { next = rth->u.dst.rt_next; rt_free(rth); } @@ -736,7 +774,7 @@ static void rt_check_expire(void) continue; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid != atomic_read(&rt_genid)) { + if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); continue; @@ -779,21 +817,21 @@ static void rt_worker_func(struct work_struct *work) * many times (2^24) without giving recent rt_genid. * Jenkins hash is strong enough that litle changes of rt_genid are OK. */ -static void rt_cache_invalidate(void) +static void rt_cache_invalidate(struct net *net) { unsigned char shuffle; get_random_bytes(&shuffle, sizeof(shuffle)); - atomic_add(shuffle + 1U, &rt_genid); + atomic_add(shuffle + 1U, &net->ipv4.rt_genid); } /* * delay < 0 : invalidate cache (fast : entries will be deleted later) * delay >= 0 : invalidate & flush cache (can be long) */ -void rt_cache_flush(int delay) +void rt_cache_flush(struct net *net, int delay) { - rt_cache_invalidate(); + rt_cache_invalidate(net); if (delay >= 0) rt_do_flush(!in_softirq()); } @@ -801,10 +839,11 @@ void rt_cache_flush(int delay) /* * We change rt_genid and let gc do the cleanup */ -static void rt_secret_rebuild(unsigned long dummy) +static void rt_secret_rebuild(unsigned long __net) { - rt_cache_invalidate(); - mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval); + struct net *net = (struct net *)__net; + rt_cache_invalidate(net); + mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } /* @@ -880,7 +919,7 @@ static int rt_garbage_collect(struct dst_ops *ops) rthp = &rt_hash_table[k].chain; spin_lock_bh(rt_hash_lock_addr(k)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid == atomic_read(&rt_genid) && + if (!rt_is_expired(rth) && !rt_may_expire(rth, tmo, expire)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; @@ -962,7 +1001,7 @@ restart: spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { - if (rth->rt_genid != atomic_read(&rt_genid)) { + if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); continue; @@ -1138,7 +1177,7 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_lock_bh(rt_hash_lock_addr(hash)); ip_rt_put(rt); while ((aux = *rthp) != NULL) { - if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) { + if (aux == rt || rt_is_expired(aux)) { *rthp = aux->u.dst.rt_next; rt_free(aux); continue; @@ -1180,7 +1219,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, for (i = 0; i < 2; i++) { for (k = 0; k < 2; k++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], + rt_genid(net)); rthp=&rt_hash_table[hash].chain; @@ -1192,7 +1232,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rth->fl.fl4_src != skeys[i] || rth->fl.oif != ikeys[k] || rth->fl.iif != 0 || - rth->rt_genid != atomic_read(&rt_genid) || + rt_is_expired(rth) || !net_eq(dev_net(rth->u.dst.dev), net)) { rthp = &rth->u.dst.rt_next; continue; @@ -1231,7 +1271,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; rt->u.dst.xfrm = NULL; - rt->rt_genid = atomic_read(&rt_genid); + rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; /* Gateway is different ... */ @@ -1295,7 +1335,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) } else if ((rt->rt_flags & RTCF_REDIRECTED) || rt->u.dst.expires) { unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, - rt->fl.oif); + rt->fl.oif, + rt_genid(dev_net(dst->dev))); #if RT_CACHE_DEBUG >= 1 printk(KERN_DEBUG "ipv4_negative_advice: redirect to " NIPQUAD_FMT "/%02x dropped\n", @@ -1444,7 +1485,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, for (k = 0; k < 2; k++) { for (i = 0; i < 2; i++) { - unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]); + unsigned hash = rt_hash(daddr, skeys[i], ikeys[k], + rt_genid(net)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -1459,7 +1501,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, rth->fl.iif != 0 || dst_metric_locked(&rth->u.dst, RTAX_MTU) || !net_eq(dev_net(rth->u.dst.dev), net) || - rth->rt_genid != atomic_read(&rt_genid)) + !rt_is_expired(rth)) continue; if (new_mtu < 68 || new_mtu >= old_mtu) { @@ -1694,7 +1736,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->fl.oif = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(dev)); rth->rt_flags = RTCF_MULTICAST; rth->rt_type = RTN_MULTICAST; if (our) { @@ -1709,7 +1751,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, RT_CACHE_STAT_INC(in_slow_mc); in_dev_put(in_dev); - hash = rt_hash(daddr, saddr, dev->ifindex); + hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); return rt_intern_hash(hash, rth, &skb->rtable); e_nobufs: @@ -1835,7 +1877,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->u.dst.input = ip_forward; rth->u.dst.output = ip_output; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); rt_set_nexthop(rth, res, itag); @@ -1870,7 +1912,8 @@ static int ip_mkroute_input(struct sk_buff *skb, return err; /* put it into the cache */ - hash = rt_hash(daddr, saddr, fl->iif); + hash = rt_hash(daddr, saddr, fl->iif, + rt_genid(dev_net(rth->u.dst.dev))); return rt_intern_hash(hash, rth, &skb->rtable); } @@ -1996,7 +2039,7 @@ local_input: goto e_nobufs; rth->u.dst.output= ip_rt_bug; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(net); atomic_set(&rth->u.dst.__refcnt, 1); rth->u.dst.flags= DST_HOST; @@ -2026,7 +2069,7 @@ local_input: rth->rt_flags &= ~RTCF_LOCAL; } rth->rt_type = res.type; - hash = rt_hash(daddr, saddr, fl.iif); + hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); err = rt_intern_hash(hash, rth, &skb->rtable); goto done; @@ -2077,7 +2120,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, net = dev_net(dev); tos &= IPTOS_RT_MASK; - hash = rt_hash(daddr, saddr, iif); + hash = rt_hash(daddr, saddr, iif, rt_genid(net)); rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2089,7 +2132,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, (rth->fl.fl4_tos ^ tos)) == 0 && rth->fl.mark == skb->mark && net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + !rt_is_expired(rth)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(in_hit); rcu_read_unlock(); @@ -2217,7 +2260,7 @@ static int __mkroute_output(struct rtable **result, rth->rt_spec_dst= fl->fl4_src; rth->u.dst.output=ip_output; - rth->rt_genid = atomic_read(&rt_genid); + rth->rt_genid = rt_genid(dev_net(dev_out)); RT_CACHE_STAT_INC(out_slow_tot); @@ -2266,7 +2309,8 @@ static int ip_mkroute_output(struct rtable **rp, int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { - hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif); + hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif, + rt_genid(dev_net(dev_out))); err = rt_intern_hash(hash, rth, rp); } @@ -2478,7 +2522,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, unsigned hash; struct rtable *rth; - hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; @@ -2491,7 +2535,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, !((rth->fl.fl4_tos ^ flp->fl4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->u.dst.dev), net) && - rth->rt_genid == atomic_read(&rt_genid)) { + !rt_is_expired(rth)) { dst_use(&rth->u.dst, jiffies); RT_CACHE_STAT_INC(out_hit); rcu_read_unlock_bh(); @@ -2522,7 +2566,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { }; -static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) +static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp) { struct rtable *ort = *rp; struct rtable *rt = (struct rtable *) @@ -2546,7 +2590,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp) rt->idev = ort->idev; if (rt->idev) in_dev_hold(rt->idev); - rt->rt_genid = atomic_read(&rt_genid); + rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_dst = ort->rt_dst; @@ -2582,7 +2626,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) - err = ipv4_dst_blackhole(rp, flp); + err = ipv4_dst_blackhole(net, rp, flp); return err; } @@ -2801,7 +2845,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) rt = rcu_dereference(rt->u.dst.rt_next), idx++) { if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) continue; - if (rt->rt_genid != atomic_read(&rt_genid)) + if (rt_is_expired(rt)) continue; skb->dst = dst_clone(&rt->u.dst); if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, @@ -2825,19 +2869,27 @@ done: void ip_rt_multicast_event(struct in_device *in_dev) { - rt_cache_flush(0); + rt_cache_flush(dev_net(in_dev->dev), 0); } #ifdef CONFIG_SYSCTL -static int flush_delay; - static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { if (write) { + int flush_delay; + struct net *net; + static DEFINE_MUTEX(flush_mutex); + + mutex_lock(&flush_mutex); + ctl->data = &flush_delay; proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - rt_cache_flush(flush_delay); + ctl->data = NULL; + mutex_unlock(&flush_mutex); + + net = (struct net *)ctl->extra1; + rt_cache_flush(net, flush_delay); return 0; } @@ -2853,25 +2905,18 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, size_t newlen) { int delay; + struct net *net; if (newlen != sizeof(int)) return -EINVAL; if (get_user(delay, (int __user *)newval)) return -EFAULT; - rt_cache_flush(delay); + net = (struct net *)table->extra1; + rt_cache_flush(net, delay); return 0; } ctl_table ipv4_route_table[] = { { - .ctl_name = NET_IPV4_ROUTE_FLUSH, - .procname = "flush", - .data = &flush_delay, - .maxlen = sizeof(int), - .mode = 0200, - .proc_handler = &ipv4_sysctl_rtcache_flush, - .strategy = &ipv4_sysctl_rtcache_flush_strategy, - }, - { .ctl_name = NET_IPV4_ROUTE_GC_THRESH, .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, @@ -3009,8 +3054,97 @@ ctl_table ipv4_route_table[] = { }, { .ctl_name = 0 } }; + +static __net_initdata struct ctl_path ipv4_route_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "ipv4", .ctl_name = NET_IPV4, }, + { .procname = "route", .ctl_name = NET_IPV4_ROUTE, }, + { }, +}; + + +static struct ctl_table ipv4_route_flush_table[] = { + { + .ctl_name = NET_IPV4_ROUTE_FLUSH, + .procname = "flush", + .maxlen = sizeof(int), + .mode = 0200, + .proc_handler = &ipv4_sysctl_rtcache_flush, + .strategy = &ipv4_sysctl_rtcache_flush_strategy, + }, + { .ctl_name = 0 }, +}; + +static __net_init int sysctl_route_net_init(struct net *net) +{ + struct ctl_table *tbl; + + tbl = ipv4_route_flush_table; + if (net != &init_net) { + tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + } + tbl[0].extra1 = net; + + net->ipv4.route_hdr = + register_net_sysctl_table(net, ipv4_route_path, tbl); + if (net->ipv4.route_hdr == NULL) + goto err_reg; + return 0; + +err_reg: + if (tbl != ipv4_route_flush_table) + kfree(tbl); +err_dup: + return -ENOMEM; +} + +static __net_exit void sysctl_route_net_exit(struct net *net) +{ + struct ctl_table *tbl; + + tbl = net->ipv4.route_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv4.route_hdr); + BUG_ON(tbl == ipv4_route_flush_table); + kfree(tbl); +} + +static __net_initdata struct pernet_operations sysctl_route_ops = { + .init = sysctl_route_net_init, + .exit = sysctl_route_net_exit, +}; #endif + +static __net_init int rt_secret_timer_init(struct net *net) +{ + atomic_set(&net->ipv4.rt_genid, + (int) ((num_physpages ^ (num_physpages>>8)) ^ + (jiffies ^ (jiffies >> 7)))); + + net->ipv4.rt_secret_timer.function = rt_secret_rebuild; + net->ipv4.rt_secret_timer.data = (unsigned long)net; + init_timer_deferrable(&net->ipv4.rt_secret_timer); + + net->ipv4.rt_secret_timer.expires = + jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + return 0; +} + +static __net_exit void rt_secret_timer_exit(struct net *net) +{ + del_timer_sync(&net->ipv4.rt_secret_timer); +} + +static __net_initdata struct pernet_operations rt_secret_timer_ops = { + .init = rt_secret_timer_init, + .exit = rt_secret_timer_exit, +}; + + #ifdef CONFIG_NET_CLS_ROUTE struct ip_rt_acct *ip_rt_acct __read_mostly; #endif /* CONFIG_NET_CLS_ROUTE */ @@ -3029,9 +3163,6 @@ int __init ip_rt_init(void) { int rc = 0; - atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^ - (jiffies ^ (jiffies >> 7)))); - #ifdef CONFIG_NET_CLS_ROUTE ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); if (!ip_rt_acct) @@ -3063,19 +3194,14 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); - rt_secret_timer.function = rt_secret_rebuild; - rt_secret_timer.data = 0; - init_timer_deferrable(&rt_secret_timer); - /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ schedule_delayed_work(&expires_work, net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&rt_secret_timer); + if (register_pernet_subsys(&rt_secret_timer_ops)) + printk(KERN_ERR "Unable to setup rt_secret_timer\n"); if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); @@ -3085,6 +3211,9 @@ int __init ip_rt_init(void) #endif rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); +#ifdef CONFIG_SYSCTL + register_pernet_subsys(&sysctl_route_ops); +#endif return rc; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9016070..14ef202 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -793,7 +793,8 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_icmp_ratelimit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies }, { .ctl_name = NET_IPV4_ICMP_RATEMASK, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index de30e70..d6ea970 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -947,17 +947,21 @@ static void tcp_update_reordering(struct sock *sk, const int metric, { struct tcp_sock *tp = tcp_sk(sk); if (metric > tp->reordering) { + int mib_idx; + tp->reordering = min(TCP_MAX_REORDERING, metric); /* This exciting event is worth to be remembered. 8) */ if (ts) - NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); + mib_idx = LINUX_MIB_TCPTSREORDER; else if (tcp_is_reno(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); + mib_idx = LINUX_MIB_TCPRENOREORDER; else if (tcp_is_fack(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); + mib_idx = LINUX_MIB_TCPFACKREORDER; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS_BH(mib_idx); #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -1456,18 +1460,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, if (!tcp_is_sackblock_valid(tp, dup_sack, sp[used_sacks].start_seq, sp[used_sacks].end_seq)) { + int mib_idx; + if (dup_sack) { if (!tp->undo_marker) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); + mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO; else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); + mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD; } else { /* Don't count olds caused by ACK reordering */ if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && !after(sp[used_sacks].end_seq, tp->snd_una)) continue; - NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); + mib_idx = LINUX_MIB_TCPSACKDISCARD; } + + NET_INC_STATS_BH(mib_idx); if (i == 0) first_sack_index = -1; continue; @@ -2380,15 +2388,19 @@ static int tcp_try_undo_recovery(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_may_undo(tp)) { + int mib_idx; + /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); tcp_undo_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); + mib_idx = LINUX_MIB_TCPLOSSUNDO; else - NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); + mib_idx = LINUX_MIB_TCPFULLUNDO; + + NET_INC_STATS_BH(mib_idx); tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { @@ -2560,7 +2572,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int fast_rexmit = 0; + int fast_rexmit = 0, mib_idx; if (WARN_ON(!tp->packets_out && tp->sacked_out)) tp->sacked_out = 0; @@ -2683,9 +2695,11 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* Otherwise enter Recovery state */ if (tcp_is_reno(tp)) - NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY); + mib_idx = LINUX_MIB_TCPRENORECOVERY; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY); + mib_idx = LINUX_MIB_TCPSACKRECOVERY; + + NET_INC_STATS_BH(mib_idx); tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; @@ -3700,10 +3714,14 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) { if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + int mib_idx; + if (before(seq, tp->rcv_nxt)) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); + mib_idx = LINUX_MIB_TCPDSACKOLDSENT; else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT); + mib_idx = LINUX_MIB_TCPDSACKOFOSENT; + + NET_INC_STATS_BH(mib_idx); tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8f83ab4..edef2af 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1985,14 +1985,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (sacked & TCPCB_LOST) { if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { + int mib_idx; + if (tcp_retransmit_skb(sk, skb)) { tp->retransmit_skb_hint = NULL; return; } if (icsk->icsk_ca_state != TCP_CA_Loss) - NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); + mib_idx = LINUX_MIB_TCPFASTRETRANS; else - NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); + mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; + NET_INC_STATS_BH(mib_idx); if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3e358cb..6a480d1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -326,24 +326,27 @@ static void tcp_retransmit_timer(struct sock *sk) goto out; if (icsk->icsk_retransmits == 0) { + int mib_idx; + if (icsk->icsk_ca_state == TCP_CA_Disorder || icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) { if (icsk->icsk_ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); + mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; else - NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); + mib_idx = LINUX_MIB_TCPSACKFAILURES; } else { if (icsk->icsk_ca_state == TCP_CA_Recovery) - NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); + mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; else - NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); + mib_idx = LINUX_MIB_TCPRENOFAILURES; } } else if (icsk->icsk_ca_state == TCP_CA_Loss) { - NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); + mib_idx = LINUX_MIB_TCPLOSSFAILURES; } else { - NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); + mib_idx = LINUX_MIB_TCPTIMEOUTS; } + NET_INC_STATS_BH(mib_idx); } if (tcp_use_frto(sk)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3bbf6fb..7187121 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -526,7 +526,8 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -725,7 +726,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; @@ -888,7 +890,8 @@ try_again: goto out_free; if (!peeked) - UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_timestamp(msg, sk, skb); @@ -917,7 +920,7 @@ out: csum_copy_err: lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) - UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); release_sock(sk); if (noblock) @@ -988,7 +991,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) ret = (*up->encap_rcv)(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); return -ret; } @@ -1041,7 +1045,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); atomic_inc(&sk->sk_drops); } goto drop; @@ -1050,7 +1055,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) return 0; drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -1158,7 +1163,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], struct rtable *rt = (struct rtable*)skb->dst; __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; - struct net *net; + struct net *net = dev_net(skb->dev); /* * Validate the packet. @@ -1180,7 +1185,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp4_csum_init(skb, uh, proto)) goto csum_error; - net = dev_net(skb->dev); if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable); @@ -1214,7 +1218,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto csum_error; - UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -1248,7 +1252,7 @@ csum_error: ntohs(uh->dest), ulen); drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -1455,7 +1459,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL && udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite); + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INERRORS, is_lite); __skb_unlink(skb, rcvq); kfree_skb(skb); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 84127d8..2ec73e6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -119,6 +119,7 @@ static void ipv6_regen_rndid(unsigned long data); static int desync_factor = MAX_DESYNC_FACTOR * HZ; #endif +static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); /* @@ -183,6 +184,8 @@ struct ipv6_devconf ipv6_devconf __read_mostly = { #endif .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ + .disable_ipv6 = 0, + .accept_dad = 1, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -215,6 +218,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { #endif .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ + .disable_ipv6 = 0, + .accept_dad = 1, }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -378,6 +383,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) */ in6_dev_hold(ndev); + if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) + ndev->cnf.accept_dad = -1; + #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { printk(KERN_INFO @@ -578,6 +586,13 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, struct rt6_info *rt; int hash; int err = 0; + int addr_type = ipv6_addr_type(addr); + + if (addr_type == IPV6_ADDR_ANY || + addr_type & IPV6_ADDR_MULTICAST || + (!(idev->dev->flags & IFF_LOOPBACK) && + addr_type & IPV6_ADDR_LOOPBACK)) + return ERR_PTR(-EADDRNOTAVAIL); rcu_read_lock_bh(); if (idev->dead) { @@ -1412,6 +1427,20 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct inet6_ifaddr *ifp) { + struct inet6_dev *idev = ifp->idev; + if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { + struct in6_addr addr; + + addr.s6_addr32[0] = htonl(0xfe800000); + addr.s6_addr32[1] = 0; + + if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) && + ipv6_addr_equal(&ifp->addr, &addr)) { + /* DAD failed for link-local based on MAC address */ + idev->cnf.disable_ipv6 = 1; + } + } + if (net_ratelimit()) printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); addrconf_dad_stop(ifp); @@ -2744,6 +2773,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) spin_lock_bh(&ifp->lock); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || + idev->cnf.accept_dad < 1 || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); @@ -2791,6 +2821,11 @@ static void addrconf_dad_timer(unsigned long data) read_unlock_bh(&idev->lock); goto out; } + if (idev->cnf.accept_dad > 1 && idev->cnf.disable_ipv6) { + read_unlock_bh(&idev->lock); + addrconf_dad_failure(ifp); + return; + } spin_lock_bh(&ifp->lock); if (ifp->probes == 0) { /* @@ -3650,6 +3685,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; #endif + array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; + array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; } static inline size_t inet6_if_nlmsg_size(void) @@ -4209,6 +4246,22 @@ static struct addrconf_sysctl_table }, #endif { + .ctl_name = CTL_UNNUMBERED, + .procname = "disable_ipv6", + .data = &ipv6_devconf.disable_ipv6, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "accept_dad", + .data = &ipv6_devconf.accept_dad, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0, /* sentinel */ } }, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3ce8d2f..3d828bc 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -59,9 +59,7 @@ #include <asm/uaccess.h> #include <asm/system.h> -#ifdef CONFIG_IPV6_MROUTE #include <linux/mroute6.h> -#endif MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); @@ -952,9 +950,9 @@ static int __init inet6_init(void) err = icmpv6_init(); if (err) goto icmp_fail; -#ifdef CONFIG_IPV6_MROUTE - ip6_mr_init(); -#endif + err = ip6_mr_init(); + if (err) + goto ipmr_fail; err = ndisc_init(); if (err) goto ndisc_fail; @@ -1057,6 +1055,8 @@ netfilter_fail: igmp_fail: ndisc_cleanup(); ndisc_fail: + ip6_mr_cleanup(); +ipmr_fail: icmpv6_cleanup(); icmp_fail: unregister_pernet_subsys(&inet6_net_ops); @@ -1111,6 +1111,7 @@ static void __exit inet6_exit(void) ipv6_netfilter_fini(); igmp6_cleanup(); ndisc_cleanup(); + ip6_mr_cleanup(); icmpv6_cleanup(); rawv6_exit(); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 399d41f..abedf95 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -954,7 +954,8 @@ ctl_table ipv6_icmp_table_template[] = { .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_ms_jiffies, + .strategy = &sysctl_ms_jiffies }, { .ctl_name = 0 }, }; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 34e5a96..ea81c61 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -71,7 +71,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || + !idev || unlikely(idev->cnf.disable_ipv6)) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fd7cd1b..0981c1e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -173,6 +173,13 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { + struct inet6_dev *idev = ip6_dst_idev(skb->dst); + if (unlikely(idev->cnf.disable_ipv6)) { + IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + kfree_skb(skb); + return 0; + } + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); @@ -498,7 +505,8 @@ int ip6_forward(struct sk_buff *skb) int addrtype = ipv6_addr_type(&hdr->saddr); /* This check is security critical. */ - if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK)) + if (addrtype == IPV6_ADDR_ANY || + addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 90e7630..cfac26d 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -948,23 +948,51 @@ static struct notifier_block ip6_mr_notifier = { * Setup for IP multicast routing */ -void __init ip6_mr_init(void) +int __init ip6_mr_init(void) { + int err; + mrt_cachep = kmem_cache_create("ip6_mrt_cache", sizeof(struct mfc6_cache), 0, SLAB_HWCACHE_ALIGN, NULL); if (!mrt_cachep) - panic("cannot allocate ip6_mrt_cache"); + return -ENOMEM; setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); - register_netdevice_notifier(&ip6_mr_notifier); + err = register_netdevice_notifier(&ip6_mr_notifier); + if (err) + goto reg_notif_fail; +#ifdef CONFIG_PROC_FS + err = -ENOMEM; + if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(&init_net, "ip6_mr_cache", + 0, &ip6mr_mfc_fops)) + goto proc_cache_fail; +#endif + return 0; +reg_notif_fail: + kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops); - proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops); +proc_vif_fail: + unregister_netdevice_notifier(&ip6_mr_notifier); +proc_cache_fail: + proc_net_remove(&init_net, "ip6_mr_vif"); #endif + return err; } +void ip6_mr_cleanup(void) +{ +#ifdef CONFIG_PROC_FS + proc_net_remove(&init_net, "ip6_mr_cache"); + proc_net_remove(&init_net, "ip6_mr_vif"); +#endif + unregister_netdevice_notifier(&ip6_mr_notifier); + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); +} static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 751e98f..5d6c166 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -228,7 +228,7 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) static inline int rt6_need_strict(struct in6_addr *daddr) { return (ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); } /* @@ -237,15 +237,20 @@ static inline int rt6_need_strict(struct in6_addr *daddr) static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *rt, + struct in6_addr *saddr, int oif, int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; - if (oif) { - for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { - struct net_device *dev = sprt->rt6i_dev; + if (!oif && ipv6_addr_any(saddr)) + goto out; + + for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { + struct net_device *dev = sprt->rt6i_dev; + + if (oif) { if (dev->ifindex == oif) return sprt; if (dev->flags & IFF_LOOPBACK) { @@ -259,14 +264,21 @@ static inline struct rt6_info *rt6_device_match(struct net *net, } local = sprt; } + } else { + if (ipv6_chk_addr(net, saddr, dev, + flags & RT6_LOOKUP_F_IFACE)) + return sprt; } + } + if (oif) { if (local) return local; if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } +out: return rt; } @@ -539,7 +551,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(net, rt, fl->oif, flags); + rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); BACKTRACK(net, &fl->fl6_src); out: dst_use(&rt->u.dst, jiffies); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f91e1df..d1477b3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -166,7 +166,8 @@ try_again: goto out_free; if (!peeked) - UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); sock_recv_timestamp(msg, sk, skb); @@ -213,7 +214,7 @@ out: csum_copy_err: lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); release_sock(sk); if (flags & MSG_DONTWAIT) @@ -298,7 +299,8 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); atomic_inc(&sk->sk_drops); } goto drop; @@ -306,7 +308,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) return 0; drop: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -438,7 +440,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], struct net_device *dev = skb->dev; struct in6_addr *saddr, *daddr; u32 ulen = 0; - struct net *net; + struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto short_packet; @@ -473,7 +475,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp6_csum_init(skb, uh, proto)) goto discard; - net = dev_net(skb->dev); /* * Multicast receive code */ @@ -496,7 +497,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, + proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -521,7 +523,7 @@ short_packet: ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -591,7 +593,8 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -873,7 +876,8 @@ out: * seems like overkill. */ if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_SNDBUFERRORS, is_udplite); } return err; diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index f8a699e..f98c802 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -21,6 +21,7 @@ #include <net/af_rxrpc.h> #include <net/ip.h> #include <net/udp.h> +#include <net/net_namespace.h> #include "ar-internal.h" unsigned long rxrpc_ack_timeout = 1; @@ -708,12 +709,12 @@ void rxrpc_data_ready(struct sock *sk, int count) if (skb_checksum_complete(skb)) { rxrpc_free_skb(skb); rxrpc_put_local(local); - UDP_INC_STATS_BH(UDP_MIB_INERRORS, 0); + UDP_INC_STATS_BH(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; } - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, 0); + UDP_INC_STATS_BH(&init_net, UDP_MIB_INDATAGRAMS, 0); /* the socket buffer we have is owned by UDP, with UDP's data all over * it, but we really want our own */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 43460a1..df5572c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4223,6 +4223,8 @@ static int sctp_getsockopt_peer_addrs_num_old(struct sock *sk, int len, if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) return -EFAULT; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_NUM_OLD " + "socket option deprecated\n"); /* For UDP-style sockets, id specifies the association to query. */ asoc = sctp_id2assoc(sk, id); if (!asoc) @@ -4262,6 +4264,9 @@ static int sctp_getsockopt_peer_addrs_old(struct sock *sk, int len, if (getaddrs.addr_num <= 0) return -EINVAL; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_PEER_ADDRS_OLD " + "socket option deprecated\n"); + /* For UDP-style sockets, id specifies the association to query. */ asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) @@ -4355,6 +4360,9 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, if (copy_from_user(&id, optval, sizeof(sctp_assoc_t))) return -EFAULT; + printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_NUM_OLD " + "socket option deprecated\n"); + /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound @@ -4515,6 +4523,10 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (getaddrs.addr_num <= 0 || getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) return -EINVAL; + + printk(KERN_WARNING "SCTP: Use of SCTP_GET_LOCAL_ADDRS_OLD " + "socket option deprecated\n"); + /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound |