From 6cb6a27c45cec9184302c2e350b3593c64bc7f6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 2 Apr 2011 22:48:47 -0700 Subject: net: Call netdev_features_change() from netdev_update_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue FEAT_CHANGE notification when features are changed by netdev_update_features(). This will allow changes made by extra constraints on e.g. MTU change to be properly propagated like changes via ethtool. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 23 +++++++++++++++++------ net/core/ethtool.c | 6 +++--- 2 files changed, 20 insertions(+), 9 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 3da9fb0..02f5637 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5236,7 +5236,7 @@ u32 netdev_fix_features(struct net_device *dev, u32 features) } EXPORT_SYMBOL(netdev_fix_features); -void netdev_update_features(struct net_device *dev) +int __netdev_update_features(struct net_device *dev) { u32 features; int err = 0; @@ -5250,7 +5250,7 @@ void netdev_update_features(struct net_device *dev) features = netdev_fix_features(dev, features); if (dev->features == features) - return; + return 0; netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", dev->features, features); @@ -5258,12 +5258,23 @@ void netdev_update_features(struct net_device *dev) if (dev->netdev_ops->ndo_set_features) err = dev->netdev_ops->ndo_set_features(dev, features); - if (!err) - dev->features = features; - else if (err < 0) + if (unlikely(err < 0)) { netdev_err(dev, "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n", err, features, dev->features); + return -1; + } + + if (!err) + dev->features = features; + + return 1; +} + +void netdev_update_features(struct net_device *dev) +{ + if (__netdev_update_features(dev)) + netdev_features_change(dev); } EXPORT_SYMBOL(netdev_update_features); @@ -5430,7 +5441,7 @@ int register_netdevice(struct net_device *dev) goto err_uninit; dev->reg_state = NETREG_REGISTERED; - netdev_update_features(dev); + __netdev_update_features(dev); /* * Default initial state at registry is that the diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 74ead9e..439e4b0 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -317,7 +317,7 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr) dev->wanted_features &= ~features[0].valid; dev->wanted_features |= features[0].valid & features[0].requested; - netdev_update_features(dev); + __netdev_update_features(dev); if ((dev->wanted_features ^ dev->features) & features[0].valid) ret |= ETHTOOL_F_WISH; @@ -499,7 +499,7 @@ static int ethtool_set_one_feature(struct net_device *dev, else dev->wanted_features &= ~mask; - netdev_update_features(dev); + __netdev_update_features(dev); return 0; } @@ -551,7 +551,7 @@ int __ethtool_set_flags(struct net_device *dev, u32 data) dev->wanted_features = (dev->wanted_features & ~changed) | data; - netdev_update_features(dev); + __netdev_update_features(dev); return 0; } -- cgit v1.1 From c6e1a0d12ca7b4f22c58e55a16beacfb7d3d8462 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Apr 2011 22:30:30 -0700 Subject: net: Allow no-cache copy from user on transmit This patch uses __copy_from_user_nocache on transmit to bypass data cache for a performance improvement. skb_add_data_nocache and skb_copy_to_page_nocache can be called by sendmsg functions to use this feature, initial support is in tcp_sendmsg. This functionality is configurable per device using ethtool. Presumably, this feature would only be useful when the driver does not touch the data. The feature is turned on by default if a device indicates that it does some form of checksum offload; it is off by default for devices that do no checksum offload or indicate no checksum is necessary. For the former case copy-checksum is probably done anyway, in the latter case the device is likely loopback in which case the no cache copy is probably not beneficial. This patch was tested using 200 instances of netperf TCP_RR with 1400 byte request and one byte reply. Platform is 16 core AMD x86. No-cache copy disabled: 672703 tps, 97.13% utilization 50/90/99% latency:244.31 484.205 1028.41 No-cache copy enabled: 702113 tps, 96.16% utilization, 50/90/99% latency 238.56 467.56 956.955 Using 14000 byte request and response sizes demonstrate the effects more dramatically: No-cache copy disabled: 79571 tps, 34.34 %utlization 50/90/95% latency 1584.46 2319.59 5001.76 No-cache copy enabled: 83856 tps, 34.81% utilization 50/90/95% latency 2508.42 2622.62 2735.88 Note especially the effect on latency tail (95th percentile). This seems to provide a nice performance improvement and is consistent in the tests I ran. Presumably, this would provide the greatest benfits in the presence of an application workload stressing the cache and a lot of transmit data happening. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 12 ++++++++++++ net/core/ethtool.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 02f5637..5d0b4f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5425,6 +5425,14 @@ int register_netdevice(struct net_device *dev) dev->features &= ~NETIF_F_GSO; } + /* Turn on no cache copy if HW is doing checksum */ + dev->hw_features |= NETIF_F_NOCACHE_COPY; + if ((dev->features & NETIF_F_ALL_CSUM) && + !(dev->features & NETIF_F_NO_CSUM)) { + dev->wanted_features |= NETIF_F_NOCACHE_COPY; + dev->features |= NETIF_F_NOCACHE_COPY; + } + /* Enable GRO and NETIF_F_HIGHDMA for vlans by default, * vlan_dev_init() will do the dev->features check, so these features * are enabled only if supported by underlying device. @@ -6182,6 +6190,10 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask) } } + /* If device can't no cache copy, don't do for all */ + if (!(one & NETIF_F_NOCACHE_COPY)) + all &= ~NETIF_F_NOCACHE_COPY; + one |= NETIF_F_ALL_CSUM; one |= all & NETIF_F_ONE_FOR_ALL; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 439e4b0..719670a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -359,7 +359,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS /* NETIF_F_NTUPLE */ "rx-ntuple-filter", /* NETIF_F_RXHASH */ "rx-hashing", /* NETIF_F_RXCSUM */ "rx-checksum", - "", + /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy" "", }; -- cgit v1.1 From 68f512f21a64c9b264df6c61a9333e7890faf74b Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 2 Apr 2011 00:35:15 +0100 Subject: ethtool: Change ETHTOOL_PHYS_ID implementation to allow dropping RTNL The ethtool ETHTOOL_PHYS_ID command runs for an arbitrarily long period of time, holding the RTNL lock. This blocks routing updates, device enumeration, and various important operations that one might want to keep running while hunting for the flashing LED. We need to drop the RTNL lock during this operation, but currently the core implementation is a thin wrapper around a driver operation and drivers may well depend upon holding the lock. Define a new driver operation 'set_phys_id' with an argument that sets the ID indicator on/off/inactive/active (the last optional, for any driver or firmware that prefers to handle blinking asynchronously). When this is defined, the ethtool core drops the lock while waiting and only acquires it around calls to this operation. Deprecate the 'phys_id' operation in favour of this. It can be removed once all in-tree drivers are converted. Signed-off-by: Ben Hutchings --- net/core/ethtool.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 74ead9e..1c95f0f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include /* * Some useful ethtool_ops methods that're device independent. @@ -1618,14 +1620,63 @@ out: static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; + static bool busy; + int rc; - if (!dev->ethtool_ops->phys_id) + if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id) return -EOPNOTSUPP; + if (busy) + return -EBUSY; + if (copy_from_user(&id, useraddr, sizeof(id))) return -EFAULT; - return dev->ethtool_ops->phys_id(dev, id.data); + if (!dev->ethtool_ops->set_phys_id) + /* Do it the old way */ + return dev->ethtool_ops->phys_id(dev, id.data); + + rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); + if (rc && rc != -EINVAL) + return rc; + + /* Drop the RTNL lock while waiting, but prevent reentry or + * removal of the device. + */ + busy = true; + dev_hold(dev); + rtnl_unlock(); + + if (rc == 0) { + /* Driver will handle this itself */ + schedule_timeout_interruptible( + id.data ? id.data : MAX_SCHEDULE_TIMEOUT); + } else { + /* Driver expects to be called periodically */ + do { + rtnl_lock(); + rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON); + rtnl_unlock(); + if (rc) + break; + schedule_timeout_interruptible(HZ / 2); + + rtnl_lock(); + rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF); + rtnl_unlock(); + if (rc) + break; + schedule_timeout_interruptible(HZ / 2); + } while (!signal_pending(current) && + (id.data == 0 || --id.data != 0)); + } + + rtnl_lock(); + dev_put(dev); + busy = false; + + (void)dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_INACTIVE); + return rc; } static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) -- cgit v1.1 From 5d9f11cf5038587cc53975deb8beaa1a876a7a7b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 8 Apr 2011 12:07:22 +0000 Subject: ethtool: prevent null pointer dereference with NTUPLE set but no set_rx_ntuple This change is meant to prevent a possible null pointer dereference if NETIF_F_NTUPLE is defined but the set_rx_ntuple function pointer is not. The main motivation behind this patch is to eventually replace the ntuple interfaces entirely with the network flow classifier interfaces. This allows the device drivers to maintain the ntuple check internally while using the network flow classifier interface for setting up and displaying rules. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- net/core/ethtool.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1b7fa98..704e176 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -910,6 +910,9 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL; int ret; + if (!ops->set_rx_ntuple) + return -EOPNOTSUPP; + if (!(dev->features & NETIF_F_NTUPLE)) return -EINVAL; -- cgit v1.1 From 143780c6562080c1117cd9197ee1b33c0d838376 Mon Sep 17 00:00:00 2001 From: "Allan, Bruce W" Date: Mon, 11 Apr 2011 13:01:59 +0000 Subject: ethtool: time to blink provided in seconds not jiffies When blinking for a duration set by the user, the value specified is in seconds but it is used as the number of jiffies in the timeout after which the Physical ID indicator is deactivated. Fix by converting the timeout to seconds. Signed-off-by: Bruce Allan Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 704e176..43ef09f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1653,7 +1653,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) if (rc == 0) { /* Driver will handle this itself */ schedule_timeout_interruptible( - id.data ? id.data : MAX_SCHEDULE_TIMEOUT); + id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT); } else { /* Driver expects to be called periodically */ do { -- cgit v1.1 From bcc6d47903612c3861201cc3a866fb604f26b8b2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 7 Apr 2011 19:48:33 +0000 Subject: net: vlan: make non-hw-accel rx path similar to hw-accel Now there are 2 paths for rx vlan frames. When rx-vlan-hw-accel is enabled, skb is untagged by NIC, vlan_tci is set and the skb gets into vlan code in __netif_receive_skb - vlan_hwaccel_do_receive. For non-rx-vlan-hw-accel however, tagged skb goes thru whole __netif_receive_skb, it's untagged in ptype_base hander and reinjected This incosistency is fixed by this patch. Vlan untagging happens early in __netif_receive_skb so the rest of code (ptype_all handlers, rx_handlers) see the skb like it was untagged by hw. Signed-off-by: Jiri Pirko v1->v2: remove "inline" from vlan_core.c functions Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 95897ff..d1aebf7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3130,6 +3130,12 @@ another_round: __this_cpu_inc(softnet_data.processed); + if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + goto out; + } + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -3177,7 +3183,7 @@ ncls: ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_hwaccel_do_receive(&skb)) { + if (vlan_do_receive(&skb)) { ret = __netif_receive_skb(skb); goto out; } else if (unlikely(!skb)) -- cgit v1.1 From 872674858fe236b746317741013c830bb70775c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 12 Apr 2011 09:56:38 +0000 Subject: net: add RTNL_ASSERT in __netdev_update_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index d1aebf7..f523eee 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5247,6 +5247,8 @@ int __netdev_update_features(struct net_device *dev) u32 features; int err = 0; + ASSERT_RTNL(); + features = netdev_get_wanted_features(dev); if (dev->netdev_ops->ndo_fix_features) -- cgit v1.1 From 8b5933c380fc66a6311739f9b36a812383f82141 Mon Sep 17 00:00:00 2001 From: amit salecha Date: Thu, 7 Apr 2011 01:58:42 +0000 Subject: net: ethtool support to configure number of channels Ethtool support to configure RX, TX and other channels. combined field in struct ethtool_channels to reflect set of channel (RX, TX or other). Other channel can be link interrupts, SR-IOV coordination etc. ETHTOOL_GCHANNELS will report max and current number of RX channels, max and current number of TX channels, max and current number of other channel or max and current number of combined channel. Number of channel can be modify upto max number of channel through ETHTOOL_SCHANNELS command. Ben Hutchings: o define 'combined' and 'other' types. Most multiqueue drivers pair up RX and TX queues so that most channels combine RX and TX work. o Please could you use a kernel-doc comment to describe the structure. Signed-off-by: Amit Kumar Salecha Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 43ef09f..41dee2d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1446,6 +1446,35 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->set_ringparam(dev, &ringparam); } +static noinline_for_stack int ethtool_get_channels(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; + + if (!dev->ethtool_ops->get_channels) + return -EOPNOTSUPP; + + dev->ethtool_ops->get_channels(dev, &channels); + + if (copy_to_user(useraddr, &channels, sizeof(channels))) + return -EFAULT; + return 0; +} + +static noinline_for_stack int ethtool_set_channels(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_channels channels; + + if (!dev->ethtool_ops->set_channels) + return -EOPNOTSUPP; + + if (copy_from_user(&channels, useraddr, sizeof(channels))) + return -EFAULT; + + return dev->ethtool_ops->set_channels(dev, &channels); +} + static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) { struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; @@ -2007,6 +2036,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SGRO: rc = ethtool_set_one_feature(dev, useraddr, ethcmd); break; + case ETHTOOL_GCHANNELS: + rc = ethtool_get_channels(dev, useraddr); + break; + case ETHTOOL_SCHANNELS: + rc = ethtool_set_channels(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.1 From fce55922f5299a04c0a56b170a141fab34f13465 Mon Sep 17 00:00:00 2001 From: "Allan, Bruce W" Date: Wed, 13 Apr 2011 13:09:10 +0000 Subject: ethtool: allow custom interval for physical identification When physical identification of an adapter is done by toggling the mechanism on and off through software utilizing the set_phys_id operation, it is done with a fixed duration for both on and off states. Some drivers may want to set a custom duration for the on/off intervals. This patch changes the API so the return code from the driver's entry point when it is called with ETHTOOL_ID_ACTIVE can specify the frequency at which to cycle the on/off states, and updates the drivers that have already been converted to use the new set_phys_id and use the synchronous method for identifying an adapter. The physical identification frequency set in the updated drivers is based on how it was done prior to the introduction of set_phys_id. Compile tested only. Also fixes a compiler warning in sfc. v2: drivers do not return -EINVAL for ETHOOL_ID_ACTIVE v3: fold patchset into single patch and cleanup per Ben's feedback Signed-off-by: Bruce Allan Cc: Ben Hutchings Cc: Sathya Perla Cc: Subbu Seetharaman Cc: Ajit Khaparde Cc: Michael Chan Cc: Eilon Greenstein Cc: Divy Le Ray Cc: Don Fry Cc: Jon Mason Cc: Solarflare linux maintainers Cc: Steve Hodgson Cc: Stephen Hemminger Cc: Matt Carlson Acked-by: Jon Mason Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 41dee2d..13d79f5 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1669,7 +1669,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) return dev->ethtool_ops->phys_id(dev, id.data); rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); - if (rc && rc != -EINVAL) + if (rc < 0) return rc; /* Drop the RTNL lock while waiting, but prevent reentry or @@ -1684,21 +1684,22 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) schedule_timeout_interruptible( id.data ? (id.data * HZ) : MAX_SCHEDULE_TIMEOUT); } else { - /* Driver expects to be called periodically */ + /* Driver expects to be called at twice the frequency in rc */ + int n = rc * 2, i, interval = HZ / n; + + /* Count down seconds */ do { - rtnl_lock(); - rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ON); - rtnl_unlock(); - if (rc) - break; - schedule_timeout_interruptible(HZ / 2); - - rtnl_lock(); - rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_OFF); - rtnl_unlock(); - if (rc) - break; - schedule_timeout_interruptible(HZ / 2); + /* Count down iterations per second */ + i = n; + do { + rtnl_lock(); + rc = dev->ethtool_ops->set_phys_id(dev, + (i & 1) ? ETHTOOL_ID_OFF : ETHTOOL_ID_ON); + rtnl_unlock(); + if (rc) + break; + schedule_timeout_interruptible(interval); + } while (!signal_pending(current) && --i != 0); } while (!signal_pending(current) && (id.data == 0 || --id.data != 0)); } -- cgit v1.1 From 911cb193f3eb0370f20fbba712211e55ffede4de Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Fri, 15 Apr 2011 02:26:25 +0000 Subject: net: minor cleanup to net_namespace.c. Inline a small static function that's only ever called from one place. Signed-off-by: Rob Landley Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/net_namespace.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'net/core') diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 3f86026..1abb508 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -216,11 +216,14 @@ static void net_free(struct net *net) kmem_cache_free(net_cachep, net); } -static struct net *net_create(void) +struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *net; int rv; + if (!(flags & CLONE_NEWNET)) + return get_net(old_net); + net = net_alloc(); if (!net) return ERR_PTR(-ENOMEM); @@ -239,13 +242,6 @@ static struct net *net_create(void) return net; } -struct net *copy_net_ns(unsigned long flags, struct net *old_net) -{ - if (!(flags & CLONE_NEWNET)) - return get_net(old_net); - return net_create(); -} - static DEFINE_SPINLOCK(cleanup_list_lock); static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ -- cgit v1.1 From 21f825e61878db94c7093c8407602fc89fc38ad9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 17 Apr 2011 00:13:16 -0700 Subject: pktgen: Fix set-but-unused variable. "iph" in pktgen_output_ipsec() is set but never actually used. Kill it off. Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index aeeece7..2fa6fee 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2514,7 +2514,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; int err = 0; - struct iphdr *iph; if (!x) return 0; @@ -2524,7 +2523,6 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) return 0; spin_lock(&x->lock); - iph = ip_hdr(skb); err = x->outer_mode->output(x, skb); if (err) -- cgit v1.1 From 5f8629c526b4f7e529a6d27bbd802c0dc7fcc357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Thu, 21 Apr 2011 13:59:21 +0000 Subject: net: fix hw_features ethtool_ops->set_flags compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __ethtool_set_flags() was not taking into account features set but not user-toggleable. Since GFLAGS returns masked dev->features, EINVAL is returned when passed flags differ to it, and not to wanted_features. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 13d79f5..d8b1a8d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -546,12 +546,12 @@ int __ethtool_set_flags(struct net_device *dev, u32 data) } /* allow changing only bits set in hw_features */ - changed = (data ^ dev->wanted_features) & flags_dup_features; + changed = (data ^ dev->features) & flags_dup_features; if (changed & ~dev->hw_features) return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP; dev->wanted_features = - (dev->wanted_features & ~changed) | data; + (dev->wanted_features & ~changed) | (data & dev->hw_features); __netdev_update_features(dev); -- cgit v1.1 From b71d1d426d263b0b6cb5760322efebbfc89d4463 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Apr 2011 04:53:02 +0000 Subject: inet: constify ip headers and in6_addr Add const qualifiers to structs iphdr, ipv6hdr and in6_addr pointers where possible, to make code intention more obvious. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- net/core/netpoll.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 3871bf6..379c993 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2502,8 +2502,8 @@ static inline void ____napi_schedule(struct softnet_data *sd, __u32 __skb_get_rxhash(struct sk_buff *skb) { int nhoff, hash = 0, poff; - struct ipv6hdr *ip6; - struct iphdr *ip; + const struct ipv6hdr *ip6; + const struct iphdr *ip; u8 ip_proto; u32 addr1, addr2, ihl; union { @@ -2518,7 +2518,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; - ip = (struct iphdr *) (skb->data + nhoff); + ip = (const struct iphdr *) (skb->data + nhoff); if (ip->frag_off & htons(IP_MF | IP_OFFSET)) ip_proto = 0; else @@ -2531,7 +2531,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) goto done; - ip6 = (struct ipv6hdr *) (skb->data + nhoff); + ip6 = (const struct ipv6hdr *) (skb->data + nhoff); ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 06be243..46d9c3a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -539,7 +539,7 @@ int __netpoll_rx(struct sk_buff *skb) { int proto, len, ulen; int hits = 0; - struct iphdr *iph; + const struct iphdr *iph; struct udphdr *uh; struct netpoll_info *npinfo = skb->dev->npinfo; struct netpoll *np, *tmp; -- cgit v1.1 From 22d5969fb450afd3a4aff606360f7d52c5a3a628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Thu, 21 Apr 2011 12:42:15 +0000 Subject: net: make WARN_ON in dev_disable_lro() useful MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 379c993..541f22a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1315,7 +1315,8 @@ void dev_disable_lro(struct net_device *dev) return; __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO); - WARN_ON(dev->features & NETIF_F_LRO); + if (unlikely(dev->features & NETIF_F_LRO)) + netdev_WARN(dev, "failed to disable LRO!\n"); } EXPORT_SYMBOL(dev_disable_lro); -- cgit v1.1 From 3aba891dde3842d89ad022237b99c1ed308040b0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 19 Apr 2011 03:48:16 +0000 Subject: bonding: move processing of recv handlers into handle_frame() Since now when bonding uses rx_handler, all traffic going into bond device goes thru bond_handle_frame. So there's no need to go back into bonding code later via ptype handlers. This patch converts original ptype handlers into "bonding receive probes". These functions are called from bond_handle_frame and they are registered per-mode. Note that vlan packets are also handled because they are always untagged thanks to vlan_untag() Note that this also allows arpmon for eth-bond-bridge-vlan topology. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 541f22a..3bbb4c2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3077,25 +3077,6 @@ void netdev_rx_handler_unregister(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); -static void vlan_on_bond_hook(struct sk_buff *skb) -{ - /* - * Make sure ARP frames received on VLAN interfaces stacked on - * bonding interfaces still make their way to any base bonding - * device that may have registered for a specific ptype. - */ - if (skb->dev->priv_flags & IFF_802_1Q_VLAN && - vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING && - skb->protocol == htons(ETH_P_ARP)) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); - - if (!skb2) - return; - skb2->dev = vlan_dev_real_dev(skb->dev); - netif_rx(skb2); - } -} - static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3191,8 +3172,6 @@ ncls: goto out; } - vlan_on_bond_hook(skb); - /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; -- cgit v1.1 From 0a14842f5a3c0e88a1e59fac5c3025db39721f74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Apr 2011 09:27:32 +0000 Subject: net: filter: Just In Time compiler for x86-64 In order to speedup packet filtering, here is an implementation of a JIT compiler for x86_64 It is disabled by default, and must be enabled by the admin. echo 1 >/proc/sys/net/core/bpf_jit_enable It uses module_alloc() and module_free() to get memory in the 2GB text kernel range since we call helpers functions from the generated code. EAX : BPF A accumulator EBX : BPF X accumulator RDI : pointer to skb (first argument given to JIT function) RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) r9d : skb->len - skb->data_len (headlen) r8 : skb->data To get a trace of generated code, use : echo 2 >/proc/sys/net/core/bpf_jit_enable Example of generated code : # tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24 flen=18 proglen=147 pass=3 image=ffffffffa00b5000 JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60 JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00 JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00 JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0 JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00 JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00 JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24 JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31 JIT code: ffffffffa00b5090: c0 c9 c3 BPF program is 144 bytes long, so native program is almost same size ;) (000) ldh [12] (001) jeq #0x800 jt 2 jf 8 (002) ld [26] (003) and #0xffffff00 (004) jeq #0xc0a81400 jt 16 jf 5 (005) ld [30] (006) and #0xffffff00 (007) jeq #0xc0a81400 jt 16 jf 17 (008) jeq #0x806 jt 10 jf 9 (009) jeq #0x8035 jt 10 jf 17 (010) ld [28] (011) and #0xffffff00 (012) jeq #0xc0a81400 jt 16 jf 13 (013) ld [38] (014) and #0xffffff00 (015) jeq #0xc0a81400 jt 16 jf 17 (016) ret #65535 (017) ret #0 Signed-off-by: Eric Dumazet Cc: Arnaldo Carvalho de Melo Cc: Ben Hutchings Cc: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/core/filter.c | 65 ++++------------------------------------------ net/core/sysctl_net_core.c | 9 +++++++ 2 files changed, 14 insertions(+), 60 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index afb8afb..0eb8c44 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -39,65 +39,6 @@ #include #include -enum { - BPF_S_RET_K = 1, - BPF_S_RET_A, - BPF_S_ALU_ADD_K, - BPF_S_ALU_ADD_X, - BPF_S_ALU_SUB_K, - BPF_S_ALU_SUB_X, - BPF_S_ALU_MUL_K, - BPF_S_ALU_MUL_X, - BPF_S_ALU_DIV_X, - BPF_S_ALU_AND_K, - BPF_S_ALU_AND_X, - BPF_S_ALU_OR_K, - BPF_S_ALU_OR_X, - BPF_S_ALU_LSH_K, - BPF_S_ALU_LSH_X, - BPF_S_ALU_RSH_K, - BPF_S_ALU_RSH_X, - BPF_S_ALU_NEG, - BPF_S_LD_W_ABS, - BPF_S_LD_H_ABS, - BPF_S_LD_B_ABS, - BPF_S_LD_W_LEN, - BPF_S_LD_W_IND, - BPF_S_LD_H_IND, - BPF_S_LD_B_IND, - BPF_S_LD_IMM, - BPF_S_LDX_W_LEN, - BPF_S_LDX_B_MSH, - BPF_S_LDX_IMM, - BPF_S_MISC_TAX, - BPF_S_MISC_TXA, - BPF_S_ALU_DIV_K, - BPF_S_LD_MEM, - BPF_S_LDX_MEM, - BPF_S_ST, - BPF_S_STX, - BPF_S_JMP_JA, - BPF_S_JMP_JEQ_K, - BPF_S_JMP_JEQ_X, - BPF_S_JMP_JGE_K, - BPF_S_JMP_JGE_X, - BPF_S_JMP_JGT_K, - BPF_S_JMP_JGT_X, - BPF_S_JMP_JSET_K, - BPF_S_JMP_JSET_X, - /* Ancillary data */ - BPF_S_ANC_PROTOCOL, - BPF_S_ANC_PKTTYPE, - BPF_S_ANC_IFINDEX, - BPF_S_ANC_NLATTR, - BPF_S_ANC_NLATTR_NEST, - BPF_S_ANC_MARK, - BPF_S_ANC_QUEUE, - BPF_S_ANC_HATYPE, - BPF_S_ANC_RXHASH, - BPF_S_ANC_CPU, -}; - /* No hurry in this branch */ static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size) { @@ -145,7 +86,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns); + unsigned int pkt_len = SK_RUN_FILTER(filter, skb); err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } @@ -638,6 +579,7 @@ void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + bpf_jit_free(fp); kfree(fp); } EXPORT_SYMBOL(sk_filter_release_rcu); @@ -672,6 +614,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) atomic_set(&fp->refcnt, 1); fp->len = fprog->len; + fp->bpf_func = sk_run_filter; err = sk_chk_filter(fp->insns, fp->len); if (err) { @@ -679,6 +622,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return err; } + bpf_jit_compile(fp); + old_fp = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); rcu_assign_pointer(sk->sk_filter, fp); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 385b609..a829e3f 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -122,6 +122,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, +#ifdef CONFIG_BPF_JIT + { + .procname = "bpf_jit_enable", + .data = &bpf_jit_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#endif { .procname = "netdev_tstamp_prequeue", .data = &netdev_tstamp_prequeue, -- cgit v1.1 From 1742f183fc218798dab6fcf0ded25b6608fc0a48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 22 Apr 2011 06:31:16 +0000 Subject: net: fix netdev_increment_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify and fix netdev_increment_features() to conform to what is stated in netdevice.h comments about NETIF_F_ONE_FOR_ALL. Include FCoE segmentation and VLAN-challedged flags in computation. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 3bbb4c2..7db99b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6164,33 +6164,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, */ u32 netdev_increment_features(u32 all, u32 one, u32 mask) { - /* If device needs checksumming, downgrade to it. */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); - else if (mask & NETIF_F_ALL_CSUM) { - /* If one device supports v4/v6 checksumming, set for all. */ - if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && - !(all & NETIF_F_GEN_CSUM)) { - all &= ~NETIF_F_ALL_CSUM; - all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - } - - /* If one device supports hw checksumming, set for all. */ - if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { - all &= ~NETIF_F_ALL_CSUM; - all |= NETIF_F_HW_CSUM; - } - } + if (mask & NETIF_F_GEN_CSUM) + mask |= NETIF_F_ALL_CSUM; + mask |= NETIF_F_VLAN_CHALLENGED; - /* If device can't no cache copy, don't do for all */ - if (!(one & NETIF_F_NOCACHE_COPY)) - all &= ~NETIF_F_NOCACHE_COPY; + all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask; + all &= one | ~NETIF_F_ALL_FOR_ALL; - one |= NETIF_F_ALL_CSUM; + /* If device needs checksumming, downgrade to it. */ + if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM)) + all &= ~NETIF_F_NO_CSUM; - one |= all & NETIF_F_ONE_FOR_ALL; - all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO; - all |= one & mask & NETIF_F_ONE_FOR_ALL; + /* If one device supports hw checksumming, set for all. */ + if (all & NETIF_F_GEN_CSUM) + all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM); return all; } -- cgit v1.1 From 5c1e6aa300a7a669dc469d2dcb20172c6bd8fed9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Apr 2011 14:13:38 -0700 Subject: net: Make dst_alloc() take more explicit initializations. Now the dst->dev, dev->obsolete, and dst->flags values can be specified as well. Signed-off-by: David S. Miller --- net/core/dst.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/dst.c b/net/core/dst.c index 91104d3..9505778 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -166,7 +166,8 @@ EXPORT_SYMBOL(dst_discard); const u32 dst_default_metrics[RTAX_MAX]; -void *dst_alloc(struct dst_ops *ops, int initial_ref) +void *dst_alloc(struct dst_ops *ops, struct net_device *dev, + int initial_ref, int initial_obsolete, int flags) { struct dst_entry *dst; @@ -177,12 +178,19 @@ void *dst_alloc(struct dst_ops *ops, int initial_ref) dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - atomic_set(&dst->__refcnt, initial_ref); dst->ops = ops; - dst->lastuse = jiffies; - dst->path = dst; - dst->input = dst->output = dst_discard; + dst->dev = dev; + if (dev) + dev_hold(dev); dst_init_metrics(dst, dst_default_metrics, true); + dst->path = dst; + dst->input = dst_discard; + dst->output = dst_discard; + + dst->obsolete = initial_obsolete; + atomic_set(&dst->__refcnt, initial_ref); + dst->lastuse = jiffies; + dst->flags = flags; #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif -- cgit v1.1 From cf91166223772ef4a2ed98b9874958bf6a2470df Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Apr 2011 14:31:47 -0700 Subject: net: Use non-zero allocations in dst_alloc(). Make dst_alloc() and it's users explicitly initialize the entire entry. The zero'ing done by kmem_cache_zalloc() was almost entirely redundant. Signed-off-by: David S. Miller --- net/core/dst.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/dst.c b/net/core/dst.c index 9505778..30f0093 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -175,22 +175,36 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, if (ops->gc(ops)) return NULL; } - dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); + dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; - dst->ops = ops; + dst->child = NULL; dst->dev = dev; if (dev) dev_hold(dev); + dst->ops = ops; dst_init_metrics(dst, dst_default_metrics, true); + dst->expires = 0UL; dst->path = dst; + dst->neighbour = NULL; + dst->hh = NULL; +#ifdef CONFIG_XFRM + dst->xfrm = NULL; +#endif dst->input = dst_discard; dst->output = dst_discard; - + dst->error = 0; dst->obsolete = initial_obsolete; + dst->header_len = 0; + dst->trailer_len = 0; +#ifdef CONFIG_IP_ROUTE_CLASSID + dst->tclassid = 0; +#endif atomic_set(&dst->__refcnt, initial_ref); + dst->__use = 0; dst->lastuse = jiffies; dst->flags = flags; + dst->next = NULL; #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif -- cgit v1.1 From 7d36a991e8d36b8ae87e2aa1158d3735e656253b Mon Sep 17 00:00:00 2001 From: amit salecha Date: Fri, 22 Apr 2011 16:22:20 +0000 Subject: pktgen: create num frags requested Pktgen doesn't generate number of frags requested. Divide packet size by number of frags and fill that in every frags. Example: With packet size 1470, it generate only 11 frags. Initial frags get lenght 706, 353, 177....so on. Last frag get divided by 2. Now with this fix, each frags will get 78 bytes. Signed-off-by: Amit Kumar Salecha Signed-off-by: David S. Miller --- net/core/pktgen.c | 35 +++++++++-------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2fa6fee..ff79d94 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2622,6 +2622,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } else { int frags = pkt_dev->nfrags; int i, len; + int frag_len; if (frags > MAX_SKB_FRAGS) @@ -2633,6 +2634,8 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } i = 0; + frag_len = (datalen/frags) < PAGE_SIZE ? + (datalen/frags) : PAGE_SIZE; while (datalen > 0) { if (unlikely(!pkt_dev->page)) { int node = numa_node_id(); @@ -2646,38 +2649,18 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, skb_shinfo(skb)->frags[i].page = pkt_dev->page; get_page(pkt_dev->page); skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + /*last fragment, fill rest of data*/ + if (i == (frags - 1)) + skb_shinfo(skb)->frags[i].size = + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + else + skb_shinfo(skb)->frags[i].size = frag_len; datalen -= skb_shinfo(skb)->frags[i].size; skb->len += skb_shinfo(skb)->frags[i].size; skb->data_len += skb_shinfo(skb)->frags[i].size; i++; skb_shinfo(skb)->nr_frags = i; } - - while (i < frags) { - int rem; - - if (i == 0) - break; - - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; - - skb_shinfo(skb)->frags[i - 1].size -= rem; - - skb_shinfo(skb)->frags[i] = - skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = - skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += - skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; - } } /* Stamp the time, and sequence number, -- cgit v1.1 From 8ae6daca85c8bbd6a32c382db5e2a2a989f8bed2 Mon Sep 17 00:00:00 2001 From: David Decotigny Date: Wed, 27 Apr 2011 18:32:38 +0000 Subject: ethtool: Call ethtool's get/set_settings callbacks with cleaned data This makes sure that when a driver calls the ethtool's get/set_settings() callback of another driver, the data passed to it is clean. This guarantees that speed_hi will be zeroed correctly if the called callback doesn't explicitely set it: we are sure we don't get a corrupted speed from the underlying driver. We also take care of setting the cmd field appropriately (ETHTOOL_GSET/SSET). This applies to dev_ethtool_get_settings(), which now makes sure it sets up that ethtool command parameter correctly before passing it to drivers. This also means that whoever calls dev_ethtool_get_settings() does not have to clean the ethtool command parameter. This function also becomes an exported symbol instead of an inline. All drivers visible to make allyesconfig under x86_64 have been updated. Signed-off-by: David Decotigny Signed-off-by: David S. Miller --- net/core/dev.c | 24 ++++++++++++++++++++++++ net/core/net-sysfs.c | 24 ++++++++++-------------- 2 files changed, 34 insertions(+), 14 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 7db99b5..e95dc30 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4496,6 +4496,30 @@ void dev_set_rx_mode(struct net_device *dev) } /** + * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() + * @dev: device + * @cmd: memory area for ethtool_ops::get_settings() result + * + * The cmd arg is initialized properly (cleared and + * ethtool_cmd::cmd field set to ETHTOOL_GSET). + * + * Return device's ethtool_ops::get_settings() result value or + * -EOPNOTSUPP when device doesn't expose + * ethtool_ops::get_settings() operation. + */ +int dev_ethtool_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) + return -EOPNOTSUPP; + + memset(cmd, 0, sizeof(struct ethtool_cmd)); + cmd->cmd = ETHTOOL_GSET; + return dev->ethtool_ops->get_settings(dev, cmd); +} +EXPORT_SYMBOL(dev_ethtool_get_settings); + +/** * dev_get_flags - get flags reported to userspace * @dev: device * diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 5ceb257..381813e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -28,6 +28,7 @@ static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; +static const char fmt_udec[] = "%u\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; @@ -145,13 +146,10 @@ static ssize_t show_speed(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev) && - netdev->ethtool_ops && - netdev->ethtool_ops->get_settings) { - struct ethtool_cmd cmd = { ETHTOOL_GSET }; - - if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) - ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); + if (netif_running(netdev)) { + struct ethtool_cmd cmd; + if (!dev_ethtool_get_settings(netdev, &cmd)) + ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); return ret; @@ -166,13 +164,11 @@ static ssize_t show_duplex(struct device *dev, if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev) && - netdev->ethtool_ops && - netdev->ethtool_ops->get_settings) { - struct ethtool_cmd cmd = { ETHTOOL_GSET }; - - if (!netdev->ethtool_ops->get_settings(netdev, &cmd)) - ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half"); + if (netif_running(netdev)) { + struct ethtool_cmd cmd; + if (!dev_ethtool_get_settings(netdev, &cmd)) + ret = sprintf(buf, "%s\n", + cmd.duplex ? "full" : "half"); } rtnl_unlock(); return ret; -- cgit v1.1 From e67f88dd12f610da98ca838822f2c9b4e7c6100e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Apr 2011 22:56:07 +0000 Subject: net: dont hold rtnl mutex during netlink dump callbacks Four years ago, Patrick made a change to hold rtnl mutex during netlink dump callbacks. I believe it was a wrong move. This slows down concurrent dumps, making good old /proc/net/ files faster than rtnetlink in some situations. This occurred to me because one "ip link show dev ..." was _very_ slow on a workload adding/removing network devices in background. All dump callbacks are able to use RCU locking now, so this patch does roughly a revert of commits : 1c2d670f366 : [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks 6313c1e0992 : [RTNETLINK]: Remove unnecessary locking in dump callbacks This let writers fight for rtnl mutex and readers going full speed. It also takes care of phonet : phonet_route_get() is now called from rcu read section. I renamed it to phonet_route_get_rcu() Signed-off-by: Eric Dumazet Cc: Patrick McHardy Cc: Remi Denis-Courmont Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/fib_rules.c | 3 ++- net/core/rtnetlink.c | 12 +++++------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net/core') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 8248ebb..3911586 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -590,7 +590,8 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, int idx = 0; struct fib_rule *rule; - list_for_each_entry(rule, &ops->rules_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(rule, &ops->rules_list, list) { if (idx < cb->args[1]) goto skip; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d7c4bb4..29633125 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1007,10 +1007,11 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_idx = cb->args[1]; + rcu_read_lock(); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, node, head, index_hlist) { + hlist_for_each_entry_rcu(dev, node, head, index_hlist) { if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -1023,6 +1024,7 @@ cont: } } out: + rcu_read_unlock(); cb->args[1] = idx; cb->args[0] = h; @@ -1879,7 +1881,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) int min_len; int family; int type; - int err; type = nlh->nlmsg_type; if (type > RTM_MAX) @@ -1906,11 +1907,8 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (dumpit == NULL) return -EOPNOTSUPP; - __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); - rtnl_lock(); - return err; + return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); } memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); @@ -1980,7 +1978,7 @@ static int __net_init rtnetlink_net_init(struct net *net) { struct sock *sk; sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, - rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); + rtnetlink_rcv, NULL, THIS_MODULE); if (!sk) return -ENOMEM; net->rtnl = sk; -- cgit v1.1 From 1c5cae815d19ffe02bdfda1260949ef2b1806171 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 30 Apr 2011 01:21:32 +0000 Subject: net: call dev_alloc_name from register_netdevice Force dev_alloc_name() to be called from register_netdevice() by dev_get_valid_name(). That allows to remove multiple explicit dev_alloc_name() calls. The possibility to call dev_alloc_name in advance remains. This also fixes veth creation regresion caused by 84c49d8c3e4abefb0a41a77b25aa37ebe8d6b743 Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 24 ++++++------------------ net/core/rtnetlink.c | 8 -------- 2 files changed, 6 insertions(+), 26 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index e95dc30..3b79bad 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -948,7 +948,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) } EXPORT_SYMBOL(dev_alloc_name); -static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) +static int dev_get_valid_name(struct net_device *dev, const char *name) { struct net *net; @@ -958,7 +958,7 @@ static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt if (!dev_valid_name(name)) return -EINVAL; - if (fmt && strchr(name, '%')) + if (strchr(name, '%')) return dev_alloc_name(dev, name); else if (__dev_get_by_name(net, name)) return -EEXIST; @@ -995,7 +995,7 @@ int dev_change_name(struct net_device *dev, const char *newname) memcpy(oldname, dev->name, IFNAMSIZ); - err = dev_get_valid_name(dev, newname, 1); + err = dev_get_valid_name(dev, newname); if (err < 0) return err; @@ -5420,8 +5420,8 @@ int register_netdevice(struct net_device *dev) } } - ret = dev_get_valid_name(dev, dev->name, 0); - if (ret) + ret = dev_get_valid_name(dev, dev->name); + if (ret < 0) goto err_uninit; dev->ifindex = dev_new_index(net); @@ -5562,19 +5562,7 @@ int register_netdev(struct net_device *dev) int err; rtnl_lock(); - - /* - * If the name is a format string the caller wants us to do a - * name allocation. - */ - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto out; - } - err = register_netdevice(dev); -out: rtnl_unlock(); return err; } @@ -6056,7 +6044,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(dev, pat, 1)) + if (dev_get_valid_name(dev, pat) < 0) goto out; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 29633125..5a160f4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1572,12 +1572,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, dev->rtnl_link_state = RTNL_LINK_INITIALIZING; dev->real_num_tx_queues = real_num_queues; - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto err_free; - } - if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); if (tb[IFLA_ADDRESS]) @@ -1597,8 +1591,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, return dev; -err_free: - free_netdev(dev); err: return ERR_PTR(err); } -- cgit v1.1 From 1ab7b6ac2709d0eb05a7144cd0e14faa3a7ea162 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 14 Apr 2011 23:46:06 -0700 Subject: ethtool: remove phys_id from ethtool_ops After that all the upstream kernel drivers now use phys_id, and the old ethtool_ops interface (phys_id) can be removed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/ethtool.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d8b1a8d..927819d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1655,7 +1655,7 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) static bool busy; int rc; - if (!dev->ethtool_ops->set_phys_id && !dev->ethtool_ops->phys_id) + if (!dev->ethtool_ops->set_phys_id) return -EOPNOTSUPP; if (busy) @@ -1664,10 +1664,6 @@ static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) if (copy_from_user(&id, useraddr, sizeof(id))) return -EFAULT; - if (!dev->ethtool_ops->set_phys_id) - /* Do it the old way */ - return dev->ethtool_ops->phys_id(dev, id.data); - rc = dev->ethtool_ops->set_phys_id(dev, ETHTOOL_ID_ACTIVE); if (rc < 0) return rc; -- cgit v1.1 From 47a0200d535f08ac8f784a709c48995ca0b10288 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 3 May 2011 11:23:40 +0000 Subject: pktgen: use %pI6c for printing IPv6 addresses I don't know why %pI6 doesn't compress, but the format specifier is kernel-standard, so use it. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/pktgen.c | 109 +++++++----------------------------------------------- 1 file changed, 13 insertions(+), 96 deletions(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index ff79d94..d41d88b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -449,7 +449,6 @@ static void pktgen_stop(struct pktgen_thread *t); static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); static unsigned int scan_ip6(const char *s, char ip[16]); -static unsigned int fmt_ip6(char *s, const char ip[16]); /* Module parameters, defaults. */ static int pg_count_d __read_mostly = 1000; @@ -556,21 +555,13 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->skb_priority); if (pkt_dev->flags & F_IPV6) { - char b1[128], b2[128], b3[128]; - fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); - fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); - fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); seq_printf(seq, - " saddr: %s min_saddr: %s max_saddr: %s\n", b1, - b2, b3); - - fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); - fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); - fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); - seq_printf(seq, - " daddr: %s min_daddr: %s max_daddr: %s\n", b1, - b2, b3); - + " saddr: %pI6c min_saddr: %pI6c max_saddr: %pI6c\n" + " daddr: %pI6c min_daddr: %pI6c max_daddr: %pI6c\n", + &pkt_dev->in6_saddr, + &pkt_dev->min_in6_saddr, &pkt_dev->max_in6_saddr, + &pkt_dev->in6_daddr, + &pkt_dev->min_in6_daddr, &pkt_dev->max_in6_daddr); } else { seq_printf(seq, " dst_min: %s dst_max: %s\n", @@ -706,10 +697,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->cur_src_mac_offset); if (pkt_dev->flags & F_IPV6) { - char b1[128], b2[128]; - fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); - fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); - seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1); + seq_printf(seq, " cur_saddr: %pI6c cur_daddr: %pI6c\n", + &pkt_dev->cur_in6_saddr, + &pkt_dev->cur_in6_daddr); } else seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", pkt_dev->cur_saddr, pkt_dev->cur_daddr); @@ -1309,7 +1299,7 @@ static ssize_t pktgen_if_write(struct file *file, buf[len] = 0; scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->in6_daddr.s6_addr); + snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); @@ -1332,7 +1322,7 @@ static ssize_t pktgen_if_write(struct file *file, buf[len] = 0; scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); + snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->min_in6_daddr); @@ -1355,7 +1345,7 @@ static ssize_t pktgen_if_write(struct file *file, buf[len] = 0; scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); + snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr); if (debug) printk(KERN_DEBUG "pktgen: dst6_max set to: %s\n", buf); @@ -1376,7 +1366,7 @@ static ssize_t pktgen_if_write(struct file *file, buf[len] = 0; scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); - fmt_ip6(buf, pkt_dev->in6_saddr.s6_addr); + snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); @@ -2898,79 +2888,6 @@ static unsigned int scan_ip6(const char *s, char ip[16]) return len; } -static char tohex(char hexdigit) -{ - return hexdigit > 9 ? hexdigit + 'a' - 10 : hexdigit + '0'; -} - -static int fmt_xlong(char *s, unsigned int i) -{ - char *bak = s; - *s = tohex((i >> 12) & 0xf); - if (s != bak || *s != '0') - ++s; - *s = tohex((i >> 8) & 0xf); - if (s != bak || *s != '0') - ++s; - *s = tohex((i >> 4) & 0xf); - if (s != bak || *s != '0') - ++s; - *s = tohex(i & 0xf); - return s - bak + 1; -} - -static unsigned int fmt_ip6(char *s, const char ip[16]) -{ - unsigned int len; - unsigned int i; - unsigned int temp; - unsigned int compressing; - int j; - - len = 0; - compressing = 0; - for (j = 0; j < 16; j += 2) { - -#ifdef V4MAPPEDPREFIX - if (j == 12 && !memcmp(ip, V4mappedprefix, 12)) { - inet_ntoa_r(*(struct in_addr *)(ip + 12), s); - temp = strlen(s); - return len + temp; - } -#endif - temp = ((unsigned long)(unsigned char)ip[j] << 8) + - (unsigned long)(unsigned char)ip[j + 1]; - if (temp == 0) { - if (!compressing) { - compressing = 1; - if (j == 0) { - *s++ = ':'; - ++len; - } - } - } else { - if (compressing) { - compressing = 0; - *s++ = ':'; - ++len; - } - i = fmt_xlong(s, temp); - len += i; - s += i; - if (j < 14) { - *s++ = ':'; - ++len; - } - } - } - if (compressing) { - *s++ = ':'; - ++len; - } - *s = 0; - return len; -} - static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct pktgen_dev *pkt_dev) { -- cgit v1.1 From eed2a12f1ed9aabf0676f4d0db34aad51976c5c6 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 4 May 2011 15:30:11 +0000 Subject: net: Allow ethtool to set interface in loopback mode. This patch enables ethtool to set the loopback mode on a given interface. By configuring the interface in loopback mode in conjunction with a policy route / rule, a userland application can stress the egress / ingress path exposing the flows of the change in progress and potentially help developer(s) understand the impact of those changes without even sending a packet out on the network. Following set of commands illustrates one such example - a) ip -4 addr add 192.168.1.1/24 dev eth1 b) ip -4 rule add from all iif eth1 lookup 250 c) ip -4 route add local 0/0 dev lo proto kernel scope host table 250 d) arp -Ds 192.168.1.100 eth1 e) arp -Ds 192.168.1.200 eth1 f) sysctl -w net.ipv4.ip_nonlocal_bind=1 g) sysctl -w net.ipv4.conf.all.accept_local=1 # Assuming that the machine has 8 cores h) taskset 000f netserver -L 192.168.1.200 i) taskset 00f0 netperf -t TCP_CRR -L 192.168.1.100 -H 192.168.1.200 -l 30 Signed-off-by: Mahesh Bandewar Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 927819d..b6f4058 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -362,7 +362,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS /* NETIF_F_RXHASH */ "rx-hashing", /* NETIF_F_RXCSUM */ "rx-checksum", /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy" - "", + /* NETIF_F_LOOPBACK */ "loopback", }; static int __ethtool_get_sset_count(struct net_device *dev, int sset) -- cgit v1.1 From 226bd3411471af42f7edbdfaf73f2d54ebb62a66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 8 May 2011 23:17:57 +0000 Subject: net: use batched device unregister in veth and macvlan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit veth devices dont use the batched device unregisters yet. Since veth are a pair of devices, it makes sense to use a batch of two unregisters, this roughly divides dismantle time by two. Fix this by changing dellink() callers to always provide a non NULL head. (Idea from Michał Mirosław) This patch also handles macvlan case : We now dismantle all macvlans on top of a lower dev at once. Reported-by: Alex Bligh Signed-off-by: Eric Dumazet Cc: Michał Mirosław Cc: Jesse Gross Cc: Paul E. McKenney Cc: Ben Greear Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5a160f4..d2ba259 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1501,6 +1501,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; int err; + LIST_HEAD(list_kill); err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) @@ -1524,7 +1525,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (!ops) return -EOPNOTSUPP; - ops->dellink(dev, NULL); + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + list_del(&list_kill); return 0; } -- cgit v1.1 From 4940fc889e1e63667a15243028ddcd84d471cd8e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 7 May 2011 23:00:07 +0000 Subject: net: add mac_pton() for parsing MAC address mac_pton() parses MAC address in form XX:XX:XX:XX:XX:XX and only in that form. mac_pton() doesn't dirty result until it's sure string representation is valid. mac_pton() doesn't care about characters _after_ last octet, it's up to caller to deal with it. mac_pton() diverges from 0/-E return value convention. Target usage: if (!mac_pton(str, whatever->mac)) return -EINVAL; /* ->mac being u8 [ETH_ALEN] is filled at this point. */ /* optionally check str[3 * ETH_ALEN - 1] for termination */ Use mac_pton() in pktgen and netconsole for start. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/netpoll.c | 26 +------------------------- net/core/pktgen.c | 53 ++++++++--------------------------------------------- net/core/utils.c | 24 ++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 70 deletions(-) (limited to 'net/core') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 46d9c3a..2d7d6d4 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -698,32 +698,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if (*cur != 0) { /* MAC address */ - if ((delim = strchr(cur, ':')) == NULL) + if (!mac_pton(cur, np->remote_mac)) goto parse_failed; - *delim = 0; - np->remote_mac[0] = simple_strtol(cur, NULL, 16); - cur = delim + 1; - if ((delim = strchr(cur, ':')) == NULL) - goto parse_failed; - *delim = 0; - np->remote_mac[1] = simple_strtol(cur, NULL, 16); - cur = delim + 1; - if ((delim = strchr(cur, ':')) == NULL) - goto parse_failed; - *delim = 0; - np->remote_mac[2] = simple_strtol(cur, NULL, 16); - cur = delim + 1; - if ((delim = strchr(cur, ':')) == NULL) - goto parse_failed; - *delim = 0; - np->remote_mac[3] = simple_strtol(cur, NULL, 16); - cur = delim + 1; - if ((delim = strchr(cur, ':')) == NULL) - goto parse_failed; - *delim = 0; - np->remote_mac[4] = simple_strtol(cur, NULL, 16); - cur = delim + 1; - np->remote_mac[5] = simple_strtol(cur, NULL, 16); } netpoll_print_options(np); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d41d88b..379270f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1420,11 +1420,6 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "dst_mac")) { - char *v = valstr; - unsigned char old_dmac[ETH_ALEN]; - unsigned char *m = pkt_dev->dst_mac; - memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN); - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) return len; @@ -1432,35 +1427,16 @@ static ssize_t pktgen_if_write(struct file *file, memset(valstr, 0, sizeof(valstr)); if (copy_from_user(valstr, &user_buffer[i], len)) return -EFAULT; - i += len; - - for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) { - int value; - - value = hex_to_bin(*v); - if (value >= 0) - *m = *m * 16 + value; - - if (*v == ':') { - m++; - *m = 0; - } - } + if (!mac_pton(valstr, pkt_dev->dst_mac)) + return -EINVAL; /* Set up Dest MAC */ - if (compare_ether_addr(old_dmac, pkt_dev->dst_mac)) - memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); + memcpy(&pkt_dev->hh[0], pkt_dev->dst_mac, ETH_ALEN); - sprintf(pg_result, "OK: dstmac"); + sprintf(pg_result, "OK: dstmac %pM", pkt_dev->dst_mac); return count; } if (!strcmp(name, "src_mac")) { - char *v = valstr; - unsigned char old_smac[ETH_ALEN]; - unsigned char *m = pkt_dev->src_mac; - - memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN); - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) return len; @@ -1468,26 +1444,13 @@ static ssize_t pktgen_if_write(struct file *file, memset(valstr, 0, sizeof(valstr)); if (copy_from_user(valstr, &user_buffer[i], len)) return -EFAULT; - i += len; - - for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) { - int value; - - value = hex_to_bin(*v); - if (value >= 0) - *m = *m * 16 + value; - - if (*v == ':') { - m++; - *m = 0; - } - } + if (!mac_pton(valstr, pkt_dev->src_mac)) + return -EINVAL; /* Set up Src MAC */ - if (compare_ether_addr(old_smac, pkt_dev->src_mac)) - memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + memcpy(&pkt_dev->hh[6], pkt_dev->src_mac, ETH_ALEN); - sprintf(pg_result, "OK: srcmac"); + sprintf(pg_result, "OK: srcmac %pM", pkt_dev->src_mac); return count; } diff --git a/net/core/utils.c b/net/core/utils.c index 5fea0ab..2012bc7 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -296,3 +296,27 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, csum_unfold(*sum))); } EXPORT_SYMBOL(inet_proto_csum_replace4); + +int mac_pton(const char *s, u8 *mac) +{ + int i; + + /* XX:XX:XX:XX:XX:XX */ + if (strlen(s) < 3 * ETH_ALEN - 1) + return 0; + + /* Don't dirty result unless string is valid MAC. */ + for (i = 0; i < ETH_ALEN; i++) { + if (!strchr("0123456789abcdefABCDEF", s[i * 3])) + return 0; + if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1])) + return 0; + if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':') + return 0; + } + for (i = 0; i < ETH_ALEN; i++) { + mac[i] = (hex_to_bin(s[i * 3]) << 4) | hex_to_bin(s[i * 3 + 1]); + } + return 1; +} +EXPORT_SYMBOL(mac_pton); -- cgit v1.1 From 864b5418ebc16893cfd27ff3cda254eff0f56d6f Mon Sep 17 00:00:00 2001 From: Franco Fichtner Date: Thu, 12 May 2011 06:42:04 +0000 Subject: ethtool: bring back missing comma in netdev features strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The issue was introduced in commit eed2a12f1ed9aabf. Signed-off-by: Franco Fichtner Acked-by: Michał Mirosław Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b6f4058..b8c2b10 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -361,7 +361,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS /* NETIF_F_NTUPLE */ "rx-ntuple-filter", /* NETIF_F_RXHASH */ "rx-hashing", /* NETIF_F_RXCSUM */ "rx-checksum", - /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy" + /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy", /* NETIF_F_LOOPBACK */ "loopback", }; -- cgit v1.1 From afe12cc86b0ba545a01ad8716539ab07ab6e9e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 7 May 2011 03:22:17 +0000 Subject: net: introduce netdev_change_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be needed by bonding and other drivers changing vlan_features after ndo_init callback. As a bonus, this includes kernel-doc for netdev_update_features(). Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 75898a3..ea23353 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5289,6 +5289,14 @@ int __netdev_update_features(struct net_device *dev) return 1; } +/** + * netdev_update_features - recalculate device features + * @dev: the device to check + * + * Recalculate dev->features set and send notifications if it + * has changed. Should be called after driver or hardware dependent + * conditions might have changed that influence the features. + */ void netdev_update_features(struct net_device *dev) { if (__netdev_update_features(dev)) @@ -5297,6 +5305,23 @@ void netdev_update_features(struct net_device *dev) EXPORT_SYMBOL(netdev_update_features); /** + * netdev_change_features - recalculate device features + * @dev: the device to check + * + * Recalculate dev->features set and send notifications even + * if they have not changed. Should be called instead of + * netdev_update_features() if also dev->vlan_features might + * have changed to allow the changes to be propagated to stacked + * VLAN devices. + */ +void netdev_change_features(struct net_device *dev) +{ + __netdev_update_features(dev); + netdev_features_change(dev); +} +EXPORT_SYMBOL(netdev_change_features); + +/** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from * @dev: the device to transfer operstate to -- cgit v1.1 From 29dd54b72ba8c5ad0dd6dd33584449b5953f700b Mon Sep 17 00:00:00 2001 From: Anirban Chakraborty Date: Thu, 12 May 2011 12:48:32 +0000 Subject: ethtool: Added support for FW dump Added code to take FW dump via ethtool. Dump level can be controlled via setting the dump flag. A get function is provided to query the current setting of the dump flag. Dump data is obtained from the driver via a separate get function. Changes from v3: Fixed buffer length issue in ethtool_get_dump_data function. Updated kernel doc for ethtool_dump struct and get_dump_flag function. Changes from v2: Provided separate commands for get flag and data. Check for minimum of the two buffer length obtained via ethtool and driver and use that for dump buffer Pass up the driver return error codes up to the caller. Added kernel doc comments. Signed-off-by: Anirban Chakraborty Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b8c2b10..b8c2bcf 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1823,6 +1823,87 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev, return dev->ethtool_ops->flash_device(dev, &efl); } +static int ethtool_set_dump(struct net_device *dev, + void __user *useraddr) +{ + struct ethtool_dump dump; + + if (!dev->ethtool_ops->set_dump) + return -EOPNOTSUPP; + + if (copy_from_user(&dump, useraddr, sizeof(dump))) + return -EFAULT; + + return dev->ethtool_ops->set_dump(dev, &dump); +} + +static int ethtool_get_dump_flag(struct net_device *dev, + void __user *useraddr) +{ + int ret; + struct ethtool_dump dump; + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!dev->ethtool_ops->get_dump_flag) + return -EOPNOTSUPP; + + if (copy_from_user(&dump, useraddr, sizeof(dump))) + return -EFAULT; + + ret = ops->get_dump_flag(dev, &dump); + if (ret) + return ret; + + if (copy_to_user(useraddr, &dump, sizeof(dump))) + return -EFAULT; + return 0; +} + +static int ethtool_get_dump_data(struct net_device *dev, + void __user *useraddr) +{ + int ret; + __u32 len; + struct ethtool_dump dump, tmp; + const struct ethtool_ops *ops = dev->ethtool_ops; + void *data = NULL; + + if (!dev->ethtool_ops->get_dump_data || + !dev->ethtool_ops->get_dump_flag) + return -EOPNOTSUPP; + + if (copy_from_user(&dump, useraddr, sizeof(dump))) + return -EFAULT; + + memset(&tmp, 0, sizeof(tmp)); + tmp.cmd = ETHTOOL_GET_DUMP_FLAG; + ret = ops->get_dump_flag(dev, &tmp); + if (ret) + return ret; + + len = (tmp.len > dump.len) ? dump.len : tmp.len; + if (!len) + return -EFAULT; + + data = vzalloc(tmp.len); + if (!data) + return -ENOMEM; + ret = ops->get_dump_data(dev, &dump, data); + if (ret) + goto out; + + if (copy_to_user(useraddr, &dump, sizeof(dump))) { + ret = -EFAULT; + goto out; + } + useraddr += offsetof(struct ethtool_dump, data); + if (copy_to_user(useraddr, data, len)) + ret = -EFAULT; +out: + vfree(data); + return ret; +} + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -2039,6 +2120,15 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SCHANNELS: rc = ethtool_set_channels(dev, useraddr); break; + case ETHTOOL_SET_DUMP: + rc = ethtool_set_dump(dev, useraddr); + break; + case ETHTOOL_GET_DUMP_FLAG: + rc = ethtool_get_dump_flag(dev, useraddr); + break; + case ETHTOOL_GET_DUMP_DATA: + rc = ethtool_get_dump_data(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.1 From 0696c3a8acd3b7c3186dd231d65d97e05a75189f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Pan=28=E6=BD=98=E5=8D=AB=E5=B9=B3=29?= Date: Thu, 12 May 2011 15:46:56 +0000 Subject: net:set valid name before calling ndo_init() In commit 1c5cae815d19 (net: call dev_alloc_name from register_netdevice), a bug of bonding was involved, see example 1 and 2. In register_netdevice(), the name of net_device is not valid until dev_get_valid_name() is called. But dev->netdev_ops->ndo_init(that is bond_init) is called before dev_get_valid_name(), and it uses the invalid name of net_device. I think register_netdevice() should make sure that the name of net_device is valid before calling ndo_init(). example 1: modprobe bonding ls /proc/net/bonding/bond%d ps -eLf root 3398 2 3398 0 1 21:34 ? 00:00:00 [bond%d] example 2: modprobe bonding max_bonds=3 [ 170.100292] bonding: Ethernet Channel Bonding Driver: v3.7.1 (April 27, 2011) [ 170.101090] bonding: Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details. [ 170.102469] ------------[ cut here ]------------ [ 170.103150] WARNING: at /home/pwp/net-next-2.6/fs/proc/generic.c:586 proc_register+0x126/0x157() [ 170.104075] Hardware name: VirtualBox [ 170.105065] proc_dir_entry 'bonding/bond%d' already registered [ 170.105613] Modules linked in: bonding(+) sunrpc ipv6 uinput microcode ppdev parport_pc parport joydev e1000 pcspkr i2c_piix4 i2c_core [last unloaded: bonding] [ 170.108397] Pid: 3457, comm: modprobe Not tainted 2.6.39-rc2+ #14 [ 170.108935] Call Trace: [ 170.109382] [] warn_slowpath_common+0x6a/0x7f [ 170.109911] [] ? proc_register+0x126/0x157 [ 170.110329] [] warn_slowpath_fmt+0x2b/0x2f [ 170.110846] [] proc_register+0x126/0x157 [ 170.111870] [] proc_create_data+0x82/0x98 [ 170.112335] [] bond_create_proc_entry+0x3f/0x73 [bonding] [ 170.112905] [] bond_init+0x77/0xa5 [bonding] [ 170.113319] [] register_netdevice+0x8c/0x1d3 [ 170.113848] [] bond_create+0x6c/0x90 [bonding] [ 170.114322] [] bonding_init+0x763/0x7b1 [bonding] [ 170.114879] [] do_one_initcall+0x76/0x122 [ 170.115317] [] ? 0xf94f3fff [ 170.115799] [] sys_init_module+0x1286/0x140d [ 170.116879] [] sysenter_do_call+0x12/0x28 [ 170.117404] ---[ end trace 64e4fac3ae5fff1a ]--- [ 170.117924] bond%d: Warning: failed to register to debugfs [ 170.128728] ------------[ cut here ]------------ [ 170.129360] WARNING: at /home/pwp/net-next-2.6/fs/proc/generic.c:586 proc_register+0x126/0x157() [ 170.130323] Hardware name: VirtualBox [ 170.130797] proc_dir_entry 'bonding/bond%d' already registered [ 170.131315] Modules linked in: bonding(+) sunrpc ipv6 uinput microcode ppdev parport_pc parport joydev e1000 pcspkr i2c_piix4 i2c_core [last unloaded: bonding] [ 170.133731] Pid: 3457, comm: modprobe Tainted: G W 2.6.39-rc2+ #14 [ 170.134308] Call Trace: [ 170.134743] [] warn_slowpath_common+0x6a/0x7f [ 170.135305] [] ? proc_register+0x126/0x157 [ 170.135820] [] warn_slowpath_fmt+0x2b/0x2f [ 170.137168] [] proc_register+0x126/0x157 [ 170.137700] [] proc_create_data+0x82/0x98 [ 170.138174] [] bond_create_proc_entry+0x3f/0x73 [bonding] [ 170.138745] [] bond_init+0x77/0xa5 [bonding] [ 170.139278] [] register_netdevice+0x8c/0x1d3 [ 170.139828] [] bond_create+0x6c/0x90 [bonding] [ 170.140361] [] bonding_init+0x763/0x7b1 [bonding] [ 170.140927] [] do_one_initcall+0x76/0x122 [ 170.141494] [] ? 0xf94f3fff [ 170.141975] [] sys_init_module+0x1286/0x140d [ 170.142463] [] sysenter_do_call+0x12/0x28 [ 170.142974] ---[ end trace 64e4fac3ae5fff1b ]--- [ 170.144949] bond%d: Warning: failed to register to debugfs Signed-off-by: Weiping Pan Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index ea23353..3ed09f8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5437,6 +5437,10 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; + ret = dev_get_valid_name(dev, dev->name); + if (ret < 0) + goto out; + /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); @@ -5447,10 +5451,6 @@ int register_netdevice(struct net_device *dev) } } - ret = dev_get_valid_name(dev, dev->name); - if (ret < 0) - goto err_uninit; - dev->ifindex = dev_new_index(net); if (dev->iflink == -1) dev->iflink = dev->ifindex; -- cgit v1.1 From 2142c131a3e290ae350f8a0b0d354c0585a96df1 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 16 May 2011 11:53:49 -0700 Subject: net: convert to new cpumask API We plan to remove cpu_xx() old api later. Thus this patch convert it. This patch has no functional change. Signed-off-by: KOSAKI Motohiro Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 381813e..1b12217 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -964,7 +964,7 @@ static ssize_t store_xps_map(struct netdev_queue *queue, } else pos = map_len = alloc_len = 0; - need_set = cpu_isset(cpu, *mask) && cpu_online(cpu); + need_set = cpumask_test_cpu(cpu, mask) && cpu_online(cpu); #ifdef CONFIG_NUMA if (need_set) { if (numa_node == -2) -- cgit v1.1 From 372b2312010bece1e36f577d6c99a6193ec54cbd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 May 2011 13:56:59 -0400 Subject: net: use hlist_del_rcu() in dev_change_name() Using plain hlist_del() in dev_change_name() is wrong since a concurrent reader can crash trying to dereference LIST_POISON1. Bug introduced in commit 72c9528bab94 (net: Introduce dev_get_by_name_rcu()) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index b624fe4..30a4078 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1007,7 +1007,7 @@ rollback: } write_lock_bh(&dev_base_lock); - hlist_del(&dev->name_hlist); + hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); synchronize_rcu(); -- cgit v1.1 From 604ae14ffb6d75d6eef4757859226b758d6bf9e3 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 16 May 2011 10:37:39 +0000 Subject: net: Change netdev_fix_features messages loglevel Cool, how about we make 'Features changed' debug as well? This way userspace can't fill up the log just by tweaking tun features with an ioctl. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 30a4078..acd7423 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5258,7 +5258,7 @@ void netdev_update_features(struct net_device *dev) if (dev->features == features) return; - netdev_info(dev, "Features changed: 0x%08x -> 0x%08x\n", + netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n", dev->features, features); if (dev->netdev_ops->ndo_set_features) -- cgit v1.1 From 7cc31a9ae1477abc79d5992b3afe889f25c50c99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 17 May 2011 16:50:02 -0400 Subject: net: ethtool: fix IPV6 checksum feature name string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 74ead9e..f337525 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -330,7 +330,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS /* NETIF_F_IP_CSUM */ "tx-checksum-ipv4", /* NETIF_F_NO_CSUM */ "tx-checksum-unneeded", /* NETIF_F_HW_CSUM */ "tx-checksum-ip-generic", - /* NETIF_F_IPV6_CSUM */ "tx_checksum-ipv6", + /* NETIF_F_IPV6_CSUM */ "tx-checksum-ipv6", /* NETIF_F_HIGHDMA */ "highdma", /* NETIF_F_FRAGLIST */ "tx-scatter-gather-fraglist", /* NETIF_F_HW_VLAN_TX */ "tx-vlan-hw-insert", -- cgit v1.1 From abb57ea48fd9431fa320a5c55f73e6b5a44c2efb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 May 2011 02:21:31 -0400 Subject: net: add skb_dst_force() in sock_queue_err_skb() Commit 7fee226ad239 (add a noref bit on skb dst) forgot to use skb_dst_force() on packets queued in sk_error_queue This triggers following warning, for applications using IP_CMSG_PKTINFO receiving one error status ------------[ cut here ]------------ WARNING: at include/linux/skbuff.h:457 ip_cmsg_recv_pktinfo+0xa6/0xb0() Hardware name: 2669UYD Modules linked in: isofs vboxnetadp vboxnetflt nfsd ebtable_nat ebtables lib80211_crypt_ccmp uinput xcbc hdaps tp_smapi thinkpad_ec radeonfb fb_ddc radeon ttm drm_kms_helper drm ipw2200 intel_agp intel_gtt libipw i2c_algo_bit i2c_i801 agpgart rng_core cfbfillrect cfbcopyarea cfbimgblt video raid10 raid1 raid0 linear md_mod vboxdrv Pid: 4697, comm: miredo Not tainted 2.6.39-rc6-00569-g5895198-dirty #22 Call Trace: [] ? printk+0x1d/0x1f [] warn_slowpath_common+0x72/0xa0 [] ? ip_cmsg_recv_pktinfo+0xa6/0xb0 [] ? ip_cmsg_recv_pktinfo+0xa6/0xb0 [] warn_slowpath_null+0x20/0x30 [] ip_cmsg_recv_pktinfo+0xa6/0xb0 [] ip_cmsg_recv+0x127/0x260 [] ? skb_dequeue+0x4d/0x70 [] ? skb_copy_datagram_iovec+0x53/0x300 [] ? sub_preempt_count+0x24/0x50 [] ip_recv_error+0x23d/0x270 [] udp_recvmsg+0x264/0x2b0 [] inet_recvmsg+0xd9/0x130 [] sock_recvmsg+0xf2/0x120 [] ? might_fault+0x4b/0xa0 [] ? verify_iovec+0x4c/0xc0 [] ? sock_recvmsg_nosec+0x100/0x100 [] __sys_recvmsg+0x114/0x1e0 [] ? __lock_acquire+0x365/0x780 [] ? fget_light+0xa6/0x3e0 [] ? fget_light+0xbf/0x3e0 [] ? fget_light+0x2e/0x3e0 [] sys_recvmsg+0x39/0x60 Close bug https://bugzilla.kernel.org/show_bug.cgi?id=34622 Reported-by: Witold Baryluk Signed-off-by: Eric Dumazet CC: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7ebeed0..3e934fe 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2993,6 +2993,9 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb->destructor = sock_rmem_free; atomic_add(skb->truesize, &sk->sk_rmem_alloc); + /* before exiting rcu section, make sure dst is refcounted */ + skb_dst_force(skb); + skb_queue_tail(&sk->sk_error_queue, skb); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk, skb->len); -- cgit v1.1 From 6882f933ccee5c3a86443ffc7621ce888b93ab6b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 May 2011 18:23:21 -0400 Subject: ipv4: Kill RT_CACHE_DEBUG It's way past it's usefulness. And this gets rid of a bunch of stray ->rt_{dst,src} references. Even the comment documenting the macro was inaccurate (stated default was 1 when it's 0). If reintroduced, it should be done properly, with dynamic debug facilities. Signed-off-by: David S. Miller --- net/core/dst.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'net/core') diff --git a/net/core/dst.c b/net/core/dst.c index 30f0093..da47a29 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -33,9 +33,6 @@ * 3) This list is guarded by a mutex, * so that the gc_task and dst_dev_event() can be synchronized. */ -#if RT_CACHE_DEBUG >= 2 -static atomic_t dst_total = ATOMIC_INIT(0); -#endif /* * We want to keep lock & list close together @@ -69,10 +66,6 @@ static void dst_gc_task(struct work_struct *work) unsigned long expires = ~0L; struct dst_entry *dst, *next, head; struct dst_entry *last = &head; -#if RT_CACHE_DEBUG >= 2 - ktime_t time_start = ktime_get(); - struct timespec elapsed; -#endif mutex_lock(&dst_gc_mutex); next = dst_busy_list; @@ -146,15 +139,6 @@ loop: spin_unlock_bh(&dst_garbage.lock); mutex_unlock(&dst_gc_mutex); -#if RT_CACHE_DEBUG >= 2 - elapsed = ktime_to_timespec(ktime_sub(ktime_get(), time_start)); - printk(KERN_DEBUG "dst_total: %d delayed: %d work_perf: %d" - " expires: %lu elapsed: %lu us\n", - atomic_read(&dst_total), delayed, work_performed, - expires, - elapsed.tv_sec * USEC_PER_SEC + - elapsed.tv_nsec / NSEC_PER_USEC); -#endif } int dst_discard(struct sk_buff *skb) @@ -205,9 +189,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst->lastuse = jiffies; dst->flags = flags; dst->next = NULL; -#if RT_CACHE_DEBUG >= 2 - atomic_inc(&dst_total); -#endif dst_entries_add(ops, 1); return dst; } @@ -267,9 +248,6 @@ again: dst->ops->destroy(dst); if (dst->dev) dev_put(dst->dev); -#if RT_CACHE_DEBUG >= 2 - atomic_dec(&dst_total); -#endif kmem_cache_free(dst->ops->kmem_cachep, dst); dst = child; -- cgit v1.1 From 449f4544267e73d5db372971da63634707c32299 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 May 2011 12:24:16 +0000 Subject: macvlan: remove one synchronize_rcu() call When one macvlan device is dismantled, we can avoid one synchronize_rcu() call done after deletion from hash list, since caller will perform a synchronize_net() call after its ndo_stop() call. Add a new netdev->dismantle field to signal this dismantle intent. Reduces RTNL hold time. Signed-off-by: Eric Dumazet CC: Patrick McHardy CC: Ben Greear Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 155de20..d945379 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5126,7 +5126,7 @@ static void rollback_registered_many(struct list_head *head) list_del(&dev->unreg_list); continue; } - + dev->dismantle = true; BUG_ON(dev->reg_state != NETREG_REGISTERED); } -- cgit v1.1