From dd58ddc6928f711d8fb7101182215a0f23cf41f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 25 Jan 2011 21:56:16 +0000 Subject: batman-adv: Fix kernel panic when fetching vis data on a vis server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hash_iterate removal introduced a bug leading to a kernel panic when fetching the vis data on a vis server. That commit forgot to rename one variable name, which this commit fixes now. Reported-by: Russell Senior Signed-off-by: Linus Lüssing Signed-off-by: Sven Eckelmann --- net/batman-adv/vis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index cd4c423..f69a374 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -268,10 +268,10 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) buff_pos += sprintf(buff + buff_pos, "%pM,", entry->addr); - for (i = 0; i < packet->entries; i++) + for (j = 0; j < packet->entries; j++) buff_pos += vis_data_read_entry( buff + buff_pos, - &entries[i], + &entries[j], entry->addr, entry->primary); -- cgit v1.1 From 66c46d741e2e60f0e8b625b80edb0ab820c46d7a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 29 Jan 2011 20:44:54 -0800 Subject: gro: Reset dev pointer on reuse On older kernels the VLAN code may zero skb->dev before dropping it and causing it to be reused by GRO. Unfortunately we didn't reset skb->dev in that case which causes the next GRO user to get a bogus skb->dev pointer. This particular problem no longer happens with the current upstream kernel due to changes in VLAN processing. However, for correctness we should still reset the skb->dev pointer in the GRO reuse function in case a future user does the same thing. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 24ea2d7..93e44db 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3424,6 +3424,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; + skb->dev = napi->dev; napi->skb = skb; } -- cgit v1.1 From 13ad17745c2cbd437d9e24b2d97393e0be11c439 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 29 Jan 2011 14:57:22 +0000 Subject: net: Fix ip link add netns oops Ed Swierk writes: > On 2.6.35.7 > ip link add link eth0 netns 9999 type macvlan > where 9999 is a nonexistent PID triggers an oops and causes all network functions to hang: > [10663.821898] BUG: unable to handle kernel NULL pointer dereference at 000000000000006d > [10663.821917] IP: [] __dev_alloc_name+0x9a/0x170 > [10663.821933] PGD 1d3927067 PUD 22f5c5067 PMD 0 > [10663.821944] Oops: 0000 [#1] SMP > [10663.821953] last sysfs file: /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq > [10663.821959] CPU 3 > [10663.821963] Modules linked in: macvlan ip6table_filter ip6_tables rfcomm ipt_MASQUERADE binfmt_misc iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack sco ipt_REJECT bnep l2cap xt_tcpudp iptable_filter ip_tables x_tables bridge stp vboxnetadp vboxnetflt vboxdrv kvm_intel kvm parport_pc ppdev snd_hda_codec_intelhdmi snd_hda_codec_conexant arc4 iwlagn iwlcore mac80211 snd_hda_intel snd_hda_codec snd_hwdep snd_pcm snd_seq_midi snd_rawmidi i915 snd_seq_midi_event snd_seq thinkpad_acpi drm_kms_helper btusb tpm_tis nvram uvcvideo snd_timer snd_seq_device bluetooth videodev v4l1_compat v4l2_compat_ioctl32 tpm drm tpm_bios snd cfg80211 psmouse serio_raw intel_ips soundcore snd_page_alloc intel_agp i2c_algo_bit video output netconsole configfs lp parport usbhid hid e1000e sdhci_pci ahci libahci sdhci led_class > [10663.822155] > [10663.822161] Pid: 6000, comm: ip Not tainted 2.6.35-23-generic #41-Ubuntu 2901CTO/2901CTO > [10663.822167] RIP: 0010:[] [] __dev_alloc_name+0x9a/0x170 > [10663.822177] RSP: 0018:ffff88014aebf7b8 EFLAGS: 00010286 > [10663.822182] RAX: 00000000fffffff4 RBX: ffff8801ad900800 RCX: 0000000000000000 > [10663.822187] RDX: ffff880000000000 RSI: 0000000000000000 RDI: ffff88014ad63000 > [10663.822191] RBP: ffff88014aebf808 R08: 0000000000000041 R09: 0000000000000041 > [10663.822196] R10: 0000000000000000 R11: dead000000200200 R12: ffff88014aebf818 > [10663.822201] R13: fffffffffffffffd R14: ffff88014aebf918 R15: ffff88014ad62000 > [10663.822207] FS: 00007f00c487f700(0000) GS:ffff880001f80000(0000) knlGS:0000000000000000 > [10663.822212] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [10663.822216] CR2: 000000000000006d CR3: 0000000231f19000 CR4: 00000000000026e0 > [10663.822221] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > [10663.822226] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > [10663.822231] Process ip (pid: 6000, threadinfo ffff88014aebe000, task ffff88014afb16e0) > [10663.822236] Stack: > [10663.822240] ffff88014aebf808 ffffffff814a2bb5 ffff88014aebf7e8 00000000a00ee8d6 > [10663.822251] <0> 0000000000000000 ffffffffa00ef940 ffff8801ad900800 ffff88014aebf818 > [10663.822265] <0> ffff88014aebf918 ffff8801ad900800 ffff88014aebf858 ffffffff8149c413 > [10663.822281] Call Trace: > [10663.822290] [] ? dev_addr_init+0x75/0xb0 > [10663.822298] [] dev_alloc_name+0x43/0x90 > [10663.822307] [] rtnl_create_link+0xbe/0x1b0 > [10663.822314] [] rtnl_newlink+0x48a/0x570 > [10663.822321] [] ? rtnl_newlink+0x1ac/0x570 > [10663.822332] [] ? native_x2apic_icr_read+0x4/0x20 > [10663.822339] [] rtnetlink_rcv_msg+0x177/0x290 > [10663.822346] [] ? rtnetlink_rcv_msg+0x0/0x290 > [10663.822354] [] netlink_rcv_skb+0xa9/0xd0 > [10663.822360] [] rtnetlink_rcv+0x25/0x40 > [10663.822367] [] netlink_unicast+0x2de/0x2f0 > [10663.822374] [] netlink_sendmsg+0x1fe/0x2e0 > [10663.822383] [] sock_sendmsg+0xf3/0x120 > [10663.822391] [] ? _raw_spin_lock+0xe/0x20 > [10663.822400] [] ? __d_lookup+0x136/0x150 > [10663.822406] [] ? _raw_spin_lock+0xe/0x20 > [10663.822414] [] ? _atomic_dec_and_lock+0x4d/0x80 > [10663.822422] [] ? mntput_no_expire+0x30/0x110 > [10663.822429] [] ? move_addr_to_kernel+0x65/0x70 > [10663.822435] [] ? verify_iovec+0x88/0xe0 > [10663.822442] [] sys_sendmsg+0x240/0x3a0 > [10663.822450] [] ? __do_fault+0x479/0x560 > [10663.822457] [] ? _raw_spin_lock+0xe/0x20 > [10663.822465] [] ? alloc_fd+0x10a/0x150 > [10663.822473] [] ? do_page_fault+0x15e/0x350 > [10663.822482] [] system_call_fastpath+0x16/0x1b > [10663.822487] Code: 90 48 8d 78 02 be 25 00 00 00 e8 92 1d e2 ff 48 85 c0 75 cf bf 20 00 00 00 e8 c3 b1 c6 ff 49 89 c7 b8 f4 ff ff ff 4d 85 ff 74 bd <4d> 8b 75 70 49 8d 45 70 48 89 45 b8 49 83 ee 58 eb 28 48 8d 55 > [10663.822618] RIP [] __dev_alloc_name+0x9a/0x170 > [10663.822627] RSP > [10663.822631] CR2: 000000000000006d > [10663.822636] ---[ end trace 3dfd6c3ad5327ca7 ]--- This bug was introduced in: commit 81adee47dfb608df3ad0b91d230fb3cef75f0060 Author: Eric W. Biederman Date: Sun Nov 8 00:53:51 2009 -0800 net: Support specifying the network namespace upon device creation. There is no good reason to not support userspace specifying the network namespace during device creation, and it makes it easier to create a network device and pass it to a child network namespace with a well known name. We have to be careful to ensure that the target network namespace for the new device exists through the life of the call. To keep that logic clear I have factored out the network namespace grabbing logic into rtnl_link_get_net. In addtion we need to continue to pass the source network namespace to the rtnl_link_ops.newlink method so that we can find the base device source network namespace. Signed-off-by: Eric W. Biederman Acked-by: Eric Dumazet Where apparently I forgot to add error handling to the path where we create a new network device in a new network namespace, and pass in an invalid pid. Cc: stable@kernel.org Reported-by: Ed Swierk Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 31459ef..2d65c6b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1671,6 +1671,9 @@ replay: snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); dest_net = rtnl_link_get_net(net, tb); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + dev = rtnl_create_link(net, dest_net, ifname, ops, tb); if (IS_ERR(dev)) -- cgit v1.1 From 709b46e8d90badda1898caea50483c12af178e96 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 29 Jan 2011 16:15:56 +0000 Subject: net: Add compat ioctl support for the ipv4 multicast ioctl SIOCGETSGCNT SIOCGETSGCNT is not a unique ioctl value as it it maps tio SIOCPROTOPRIVATE +1, which unfortunately means the existing infrastructure for compat networking ioctls is insufficient. A trivial compact ioctl implementation would conflict with: SIOCAX25ADDUID SIOCAIPXPRISLT SIOCGETSGCNT_IN6 SIOCGETSGCNT SIOCRSSCAUSE SIOCX25SSUBSCRIP SIOCX25SDTEFACILITIES To make this work I have updated the compat_ioctl decode path to mirror the the normal ioctl decode path. I have added an ipv4 inet_compat_ioctl function so that I can have ipv4 specific compat ioctls. I have added a compat_ioctl function into struct proto so I can break out ioctls by which kind of ip socket I am using. I have added a compat_raw_ioctl function because SIOCGETSGCNT only works on raw sockets. I have added a ipmr_compat_ioctl that mirrors the normal ipmr_ioctl. This was necessary because unfortunately the struct layout for the SIOCGETSGCNT has unsigned longs in it so changes between 32bit and 64bit kernels. This change was sufficient to run a 32bit ip multicast routing daemon on a 64bit kernel. Reported-by: Bill Fenner Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 16 ++++++++++++++++ net/ipv4/ipmr.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/raw.c | 19 +++++++++++++++++++ 3 files changed, 81 insertions(+) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b6110..45b89d7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -880,6 +880,19 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(inet_ioctl); +#ifdef CONFIG_COMPAT +int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + struct sock *sk = sock->sk; + int err = -ENOIOCTLCMD; + + if (sk->sk_prot->compat_ioctl) + err = sk->sk_prot->compat_ioctl(sk, cmd, arg); + + return err; +} +#endif + const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, @@ -903,6 +916,7 @@ const struct proto_ops inet_stream_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_stream_ops); @@ -929,6 +943,7 @@ const struct proto_ops inet_dgram_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; EXPORT_SYMBOL(inet_dgram_ops); @@ -959,6 +974,7 @@ static const struct proto_ops inet_sockraw_ops = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, + .compat_ioctl = inet_compat_ioctl, #endif }; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3f3a9af..7e41ac0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -1434,6 +1435,51 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) } } +#ifdef CONFIG_COMPAT +struct compat_sioc_sg_req { + struct in_addr src; + struct in_addr grp; + compat_ulong_t pktcnt; + compat_ulong_t bytecnt; + compat_ulong_t wrong_if; +}; + +int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) +{ + struct sioc_sg_req sr; + struct mfc_cache *c; + struct net *net = sock_net(sk); + struct mr_table *mrt; + + mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); + if (mrt == NULL) + return -ENOENT; + + switch (cmd) { + case SIOCGETSGCNT: + if (copy_from_user(&sr, arg, sizeof(sr))) + return -EFAULT; + + rcu_read_lock(); + c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); + if (c) { + sr.pktcnt = c->mfc_un.res.pkt; + sr.bytecnt = c->mfc_un.res.bytes; + sr.wrong_if = c->mfc_un.res.wrong_if; + rcu_read_unlock(); + + if (copy_to_user(arg, &sr, sizeof(sr))) + return -EFAULT; + return 0; + } + rcu_read_unlock(); + return -EADDRNOTAVAIL; + default: + return -ENOIOCTLCMD; + } +} +#endif + static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a3d5ab7..6390ba2 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -76,6 +76,7 @@ #include #include #include +#include static struct raw_hashinfo raw_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), @@ -838,6 +839,23 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +#ifdef CONFIG_COMPAT +static int compat_raw_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) +{ + switch (cmd) { + case SIOCOUTQ: + case SIOCINQ: + return -ENOIOCTLCMD; + default: +#ifdef CONFIG_IP_MROUTE + return ipmr_compat_ioctl(sk, cmd, compat_ptr(arg)); +#else + return -ENOIOCTLCMD; +#endif + } +} +#endif + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -860,6 +878,7 @@ struct proto raw_prot = { #ifdef CONFIG_COMPAT .compat_setsockopt = compat_raw_setsockopt, .compat_getsockopt = compat_raw_getsockopt, + .compat_ioctl = compat_raw_ioctl, #endif }; -- cgit v1.1 From 2674c15870f888cb732a564fc504ce17654afc64 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 28 Jan 2011 18:34:05 +0100 Subject: batman-adv: Remove vis info on hashing errors A newly created vis info object must be removed when it couldn't be added to the hash. The old_info which has to be replaced was already removed and isn't related to the hash anymore. Signed-off-by: Sven Eckelmann --- net/batman-adv/vis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index f69a374..0be55be 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -444,7 +444,7 @@ static struct vis_info *add_packet(struct bat_priv *bat_priv, info); if (hash_added < 0) { /* did not work (for some reason) */ - kref_put(&old_info->refcount, free_info); + kref_put(&info->refcount, free_info); info = NULL; } -- cgit v1.1 From dda9fc6b2c59f056e7a2b313b8423b14a4df25a9 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 28 Jan 2011 18:34:06 +0100 Subject: batman-adv: Remove vis info element in free_info The free_info function will be called when no reference to the info object exists anymore. It must be ensured that the allocated memory gets freed and not only the elements which are managed by the info object. Signed-off-by: Sven Eckelmann --- net/batman-adv/vis.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 0be55be..988296c 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -64,6 +64,7 @@ static void free_info(struct kref *ref) spin_unlock_bh(&bat_priv->vis_list_lock); kfree_skb(info->skb_packet); + kfree(info); } /* Compare two vis packets, used by the hashing algorithm */ -- cgit v1.1 From 1181e1daace88018b2ff66592aa10a4791d705ff Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 28 Jan 2011 18:34:07 +0100 Subject: batman-adv: Make vis info stack traversal threadsafe The batman-adv vis server has to a stack which stores all information about packets which should be send later. This stack is protected with a spinlock that is used to prevent concurrent write access to it. The send_vis_packets function has to take all elements from the stack and send them to other hosts over the primary interface. The send will be initiated without the lock which protects the stack. The implementation using list_for_each_entry_safe has the problem that it stores the next element as "safe ptr" to allow the deletion of the current element in the list. The list may be modified during the unlock/lock pair in the loop body which may make the safe pointer not pointing to correct next element. It is safer to remove and use the first element from the stack until no elements are available. This does not need reduntant information which would have to be validated each time the lock was removed. Reported-by: Russell Senior Signed-off-by: Sven Eckelmann --- net/batman-adv/vis.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 988296c..de1022c 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -816,7 +816,7 @@ static void send_vis_packets(struct work_struct *work) container_of(work, struct delayed_work, work); struct bat_priv *bat_priv = container_of(delayed_work, struct bat_priv, vis_work); - struct vis_info *info, *temp; + struct vis_info *info; spin_lock_bh(&bat_priv->vis_hash_lock); purge_vis_packets(bat_priv); @@ -826,8 +826,9 @@ static void send_vis_packets(struct work_struct *work) send_list_add(bat_priv, bat_priv->my_vis_info); } - list_for_each_entry_safe(info, temp, &bat_priv->vis_send_list, - send_list) { + while (!list_empty(&bat_priv->vis_send_list)) { + info = list_first_entry(&bat_priv->vis_send_list, + typeof(*info), send_list); kref_get(&info->refcount); spin_unlock_bh(&bat_priv->vis_hash_lock); -- cgit v1.1