From ae593067dbed83010fee8ad59bab7948f3d3601f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 21 Feb 2013 12:18:52 +0000
Subject: ipv6: use a stronger hash for tcp

[ Upstream commit 08dcdbf6a7b9d14c2302c5bd0c5390ddf122f664 ]

It looks like its possible to open thousands of TCP IPv6
sessions on a server, all landing in a single slot of TCP hash
table. Incoming packets have to lookup sockets in a very
long list.

We should hash all bits from foreign IPv6 addresses, using
a salt and hash mix, not a simple XOR.

inet6_ehashfn() can also separately use the ports, instead
of xoring them.

Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/inet6_hashtables.h |  8 ++++----
 include/net/inet_sock.h        |  1 +
 include/net/ipv6.h             | 12 ++++++++++++
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index e46674d..f9ce2fa 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -28,16 +28,16 @@
 
 struct inet_hashinfo;
 
-/* I have no idea if this is a good hash for v6 or not. -DaveM */
 static inline unsigned int inet6_ehashfn(struct net *net,
 				const struct in6_addr *laddr, const u16 lport,
 				const struct in6_addr *faddr, const __be16 fport)
 {
-	u32 ports = (lport ^ (__force u16)fport);
+	u32 ports = (((u32)lport) << 16) | (__force u32)fport;
 
 	return jhash_3words((__force u32)laddr->s6_addr32[3],
-			    (__force u32)faddr->s6_addr32[3],
-			    ports, inet_ehash_secret + net_hash_mix(net));
+			    ipv6_addr_jhash(faddr),
+			    ports,
+			    inet_ehash_secret + net_hash_mix(net));
 }
 
 static inline int inet6_sk_ehashfn(const struct sock *sk)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 14dd9c7..26490b3 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -199,6 +199,7 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 extern int inet_sk_rebuild_header(struct sock *sk);
 
 extern u32 inet_ehash_secret;
+extern u32 ipv6_hash_secret;
 extern void build_ehash_secret(void);
 
 static inline unsigned int inet_ehashfn(struct net *net,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c39121f..879aadf 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -15,6 +15,7 @@
 
 #include <linux/ipv6.h>
 #include <linux/hardirq.h>
+#include <linux/jhash.h>
 #include <net/if_inet6.h>
 #include <net/ndisc.h>
 #include <net/flow.h>
@@ -386,6 +387,17 @@ struct ip6_create_arg {
 void ip6_frag_init(struct inet_frag_queue *q, void *a);
 int ip6_frag_match(struct inet_frag_queue *q, void *a);
 
+/* more secured version of ipv6_addr_hash() */
+static inline u32 ipv6_addr_jhash(const struct in6_addr *a)
+{
+	u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
+
+	return jhash_3words(v,
+			    (__force u32)a->s6_addr32[2],
+			    (__force u32)a->s6_addr32[3],
+			    ipv6_hash_secret);
+}
+
 static inline int ipv6_addr_any(const struct in6_addr *a)
 {
 	return (a->s6_addr32[0] | a->s6_addr32[1] |
-- 
cgit v1.1


From 1b92d599fe0f704d9981063d39339fea1f9bd092 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 13 Mar 2013 00:24:15 +0000
Subject: ipv4: fix definition of FIB_TABLE_HASHSZ

[ Upstream commit 5b9e12dbf92b441b37136ea71dac59f05f2673a9 ]

a long time ago by the commit

  commit 93456b6d7753def8760b423ac6b986eb9d5a4a95
  Author: Denis V. Lunev <den@openvz.org>
  Date:   Thu Jan 10 03:23:38 2008 -0800

    [IPV4]: Unify access to the routing tables.

the defenition of FIB_HASH_TABLE size has obtained wrong dependency:
it should depend upon CONFIG_IP_MULTIPLE_TABLES (as was in the original
code) but it was depended from CONFIG_IP_ROUTE_MULTIPATH

This patch returns the situation to the original state.

The problem was spotted by Tingwei Liu.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Tingwei Liu <tingw.liu@gmail.com>
CC: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip_fib.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 10422ef..2124004 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -129,18 +129,16 @@ struct fib_result_nl {
 };
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-
 #define FIB_RES_NH(res)		((res).fi->fib_nh[(res).nh_sel])
-
-#define FIB_TABLE_HASHSZ 2
-
 #else /* CONFIG_IP_ROUTE_MULTIPATH */
-
 #define FIB_RES_NH(res)		((res).fi->fib_nh[0])
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
+#ifdef CONFIG_IP_MULTIPLE_TABLES
 #define FIB_TABLE_HASHSZ 256
-
-#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+#else
+#define FIB_TABLE_HASHSZ 2
+#endif
 
 extern __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
 
-- 
cgit v1.1


From 7b7a1b8b3bd1742ca5ab259e741da0070e936db0 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 15 Mar 2013 11:32:30 +0000
Subject: inet: limit length of fragment queue hash table bucket lists

[ Upstream commit 5a3da1fe9561828d0ca7eca664b16ec2b9bf0055 ]

This patch introduces a constant limit of the fragment queue hash
table bucket list lengths. Currently the limit 128 is choosen somewhat
arbitrary and just ensures that we can fill up the fragment cache with
empty packets up to the default ip_frag_high_thresh limits. It should
just protect from list iteration eating considerable amounts of cpu.

If we reach the maximum length in one hash bucket a warning is printed.
This is implemented on the caller side of inet_frag_find to distinguish
between the different users of inet_fragment.c.

I dropped the out of memory warning in the ipv4 fragment lookup path,
because we already get a warning by the slab allocator.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jesper Dangaard Brouer <jbrouer@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/inet_frag.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 16ff29a..b289bd2 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -33,6 +33,13 @@ struct inet_frag_queue {
 
 #define INETFRAGS_HASHSZ		64
 
+/* averaged:
+ * max_depth = default ipfrag_high_thresh / INETFRAGS_HASHSZ /
+ *	       rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
+ *	       struct frag_queue))
+ */
+#define INETFRAGS_MAXDEPTH		128
+
 struct inet_frags {
 	struct hlist_head	hash[INETFRAGS_HASHSZ];
 	rwlock_t		lock;
@@ -64,6 +71,8 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f);
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 		struct inet_frags *f, void *key, unsigned int hash)
 	__releases(&f->lock);
+void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
+				   const char *prefix);
 
 static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
 {
-- 
cgit v1.1


From 73d2de1ad017f674ec21e57405e47028dbc884bf Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 19 Apr 2013 15:32:32 +0000
Subject: net: fix incorrect credentials passing

[ Upstream commit 83f1b4ba917db5dc5a061a44b3403ddb6e783494 ]

Commit 257b5358b32f ("scm: Capture the full credentials of the scm
sender") changed the credentials passing code to pass in the effective
uid/gid instead of the real uid/gid.

Obviously this doesn't matter most of the time (since normally they are
the same), but it results in differences for suid binaries when the wrong
uid/gid ends up being used.

This just undoes that (presumably unintentional) part of the commit.

Reported-by: Andy Lutomirski <luto@amacapital.net>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Serge E. Hallyn <serge@hallyn.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/scm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/scm.h b/include/net/scm.h
index 745460f..820c1b3 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -50,7 +50,7 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm,
 {
 	scm->pid  = get_pid(pid);
 	scm->cred = get_cred(cred);
-	cred_to_ucred(pid, cred, &scm->creds);
+	cred_to_ucred(pid, cred, &scm->creds, false);
 }
 
 static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
-- 
cgit v1.1


From a0464b18acc3f17ff86aa25df34c5066a21c8291 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 24 Apr 2013 18:34:55 -0700
Subject: tcp: force a dst refcount when prequeue packet

[ Upstream commit 093162553c33e9479283e107b4431378271c735d ]

Before escaping RCU protected section and adding packet into
prequeue, make sure the dst is refcounted.

Reported-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/tcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b28a49f..4881cb6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -902,6 +902,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 	if (sysctl_tcp_low_latency || !tp->ucopy.task)
 		return 0;
 
+	skb_dst_force(skb);
 	__skb_queue_tail(&tp->ucopy.prequeue, skb);
 	tp->ucopy.memory += skb->truesize;
 	if (tp->ucopy.memory > sk->sk_rcvbuf) {
-- 
cgit v1.1


From 1e74f2ea952f201c5ee5edce74daab21aea89b31 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 May 2013 10:28:16 +0000
Subject: ipv6: do not clear pinet6 field

[ Upstream commit f77d602124d865c38705df7fa25c03de9c284ad2 ]

We have seen multiple NULL dereferences in __inet6_lookup_established()

After analysis, I found that inet6_sk() could be NULL while the
check for sk_family == AF_INET6 was true.

Bug was added in linux-2.6.29 when RCU lookups were introduced in UDP
and TCP stacks.

Once an IPv6 socket, using SLAB_DESTROY_BY_RCU is inserted in a hash
table, we no longer can clear pinet6 field.

This patch extends logic used in commit fcbdf09d9652c891
("net: fix nulls list corruptions in sk_prot_alloc")

TCP/UDP/UDPLite IPv6 protocols provide their own .clear_sk() method
to make sure we do not clear pinet6 field.

At socket clone phase, we do not really care, as cloning the parent (non
NULL) pinet6 is not adding a fatal race.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sock.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index b2deeab..b6abd4f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -721,6 +721,18 @@ struct timewait_sock_ops;
 struct inet_hashinfo;
 struct raw_hashinfo;
 
+/*
+ * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes
+ * un-modified. Special care is taken when initializing object to zero.
+ */
+static inline void sk_prot_clear_nulls(struct sock *sk, int size)
+{
+	if (offsetof(struct sock, sk_node.next) != 0)
+		memset(sk, 0, offsetof(struct sock, sk_node.next));
+	memset(&sk->sk_node.pprev, 0,
+	       size - offsetof(struct sock, sk_node.pprev));
+}
+
 /* Networking protocol blocks we attach to sockets.
  * socket layer -> transport layer interface
  * transport -> network interface is defined by struct inet_proto
-- 
cgit v1.1


From 52ef39eeff06aecc56266902bba6bf28891cabd3 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Sat, 29 Jun 2013 21:30:49 +0800
Subject: ipv6,mcast: always hold idev->lock before mca_lock

[ Upstream commit 8965779d2c0e6ab246c82a405236b1fb2adae6b2, with
  some bits from commit b7b1bfce0bb68bd8f6e62a28295922785cc63781
  ("ipv6: split duplicate address detection and router solicitation timer")
  to get the __ipv6_get_lladdr() used by this patch. ]

dingtianhong reported the following deadlock detected by lockdep:

 ======================================================
 [ INFO: possible circular locking dependency detected ]
 3.4.24.05-0.1-default #1 Not tainted
 -------------------------------------------------------
 ksoftirqd/0/3 is trying to acquire lock:
  (&ndev->lock){+.+...}, at: [<ffffffff8147f804>] ipv6_get_lladdr+0x74/0x120

 but task is already holding lock:
  (&mc->mca_lock){+.+...}, at: [<ffffffff8149d130>] mld_send_report+0x40/0x150

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (&mc->mca_lock){+.+...}:
        [<ffffffff810a8027>] validate_chain+0x637/0x730
        [<ffffffff810a8417>] __lock_acquire+0x2f7/0x500
        [<ffffffff810a8734>] lock_acquire+0x114/0x150
        [<ffffffff814f691a>] rt_spin_lock+0x4a/0x60
        [<ffffffff8149e4bb>] igmp6_group_added+0x3b/0x120
        [<ffffffff8149e5d8>] ipv6_mc_up+0x38/0x60
        [<ffffffff81480a4d>] ipv6_find_idev+0x3d/0x80
        [<ffffffff81483175>] addrconf_notify+0x3d5/0x4b0
        [<ffffffff814fae3f>] notifier_call_chain+0x3f/0x80
        [<ffffffff81073471>] raw_notifier_call_chain+0x11/0x20
        [<ffffffff813d8722>] call_netdevice_notifiers+0x32/0x60
        [<ffffffff813d92d4>] __dev_notify_flags+0x34/0x80
        [<ffffffff813d9360>] dev_change_flags+0x40/0x70
        [<ffffffff813ea627>] do_setlink+0x237/0x8a0
        [<ffffffff813ebb6c>] rtnl_newlink+0x3ec/0x600
        [<ffffffff813eb4d0>] rtnetlink_rcv_msg+0x160/0x310
        [<ffffffff814040b9>] netlink_rcv_skb+0x89/0xb0
        [<ffffffff813eb357>] rtnetlink_rcv+0x27/0x40
        [<ffffffff81403e20>] netlink_unicast+0x140/0x180
        [<ffffffff81404a9e>] netlink_sendmsg+0x33e/0x380
        [<ffffffff813c4252>] sock_sendmsg+0x112/0x130
        [<ffffffff813c537e>] __sys_sendmsg+0x44e/0x460
        [<ffffffff813c5544>] sys_sendmsg+0x44/0x70
        [<ffffffff814feab9>] system_call_fastpath+0x16/0x1b

 -> #0 (&ndev->lock){+.+...}:
        [<ffffffff810a798e>] check_prev_add+0x3de/0x440
        [<ffffffff810a8027>] validate_chain+0x637/0x730
        [<ffffffff810a8417>] __lock_acquire+0x2f7/0x500
        [<ffffffff810a8734>] lock_acquire+0x114/0x150
        [<ffffffff814f6c82>] rt_read_lock+0x42/0x60
        [<ffffffff8147f804>] ipv6_get_lladdr+0x74/0x120
        [<ffffffff8149b036>] mld_newpack+0xb6/0x160
        [<ffffffff8149b18b>] add_grhead+0xab/0xc0
        [<ffffffff8149d03b>] add_grec+0x3ab/0x460
        [<ffffffff8149d14a>] mld_send_report+0x5a/0x150
        [<ffffffff8149f99e>] igmp6_timer_handler+0x4e/0xb0
        [<ffffffff8105705a>] call_timer_fn+0xca/0x1d0
        [<ffffffff81057b9f>] run_timer_softirq+0x1df/0x2e0
        [<ffffffff8104e8c7>] handle_pending_softirqs+0xf7/0x1f0
        [<ffffffff8104ea3b>] __do_softirq_common+0x7b/0xf0
        [<ffffffff8104f07f>] __thread_do_softirq+0x1af/0x210
        [<ffffffff8104f1c1>] run_ksoftirqd+0xe1/0x1f0
        [<ffffffff8106c7de>] kthread+0xae/0xc0
        [<ffffffff814fff74>] kernel_thread_helper+0x4/0x10

actually we can just hold idev->lock before taking pmc->mca_lock,
and avoid taking idev->lock again when iterating idev->addr_list,
since the upper callers of mld_newpack() already take
read_lock_bh(&idev->lock).

Reported-by: dingtianhong <dingtianhong@huawei.com>
Cc: dingtianhong <dingtianhong@huawei.com>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Tested-by: Ding Tianhong <dingtianhong@huawei.com>
Tested-by: Chen Weilong <chenweilong@huawei.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/addrconf.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 582e4ae..561fd2a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -81,6 +81,9 @@ extern int			ipv6_dev_get_saddr(struct net *net,
 					       const struct in6_addr *daddr,
 					       unsigned int srcprefs,
 					       struct in6_addr *saddr);
+extern int			__ipv6_get_lladdr(struct inet6_dev *idev,
+						  struct in6_addr *addr,
+						  unsigned char banned_flags);
 extern int			ipv6_get_lladdr(struct net_device *dev,
 						struct in6_addr *addr,
 						unsigned char banned_flags);
-- 
cgit v1.1


From 639e5920a9ae14b1eefc44a8740f5d0f816adb9a Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 1 Jul 2013 20:21:30 +0200
Subject: ipv6: call udp_push_pending_frames when uncorking a socket with
 AF_INET pending data

[ Upstream commit 8822b64a0fa64a5dd1dfcf837c5b0be83f8c05d1 ]

We accidentally call down to ip6_push_pending_frames when uncorking
pending AF_INET data on a ipv6 socket. This results in the following
splat (from Dave Jones):

skbuff: skb_under_panic: text:ffffffff816765f6 len:48 put:40 head:ffff88013deb6df0 data:ffff88013deb6dec tail:0x2c end:0xc0 dev:<NULL>
------------[ cut here ]------------
kernel BUG at net/core/skbuff.c:126!
invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
Modules linked in: dccp_ipv4 dccp 8021q garp bridge stp dlci mpoa snd_seq_dummy sctp fuse hidp tun bnep nfnetlink scsi_transport_iscsi rfcomm can_raw can_bcm af_802154 appletalk caif_socket can caif ipt_ULOG x25 rose af_key pppoe pppox ipx phonet irda llc2 ppp_generic slhc p8023 psnap p8022 llc crc_ccitt atm bluetooth
+netrom ax25 nfc rfkill rds af_rxrpc coretemp hwmon kvm_intel kvm crc32c_intel snd_hda_codec_realtek ghash_clmulni_intel microcode pcspkr snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep usb_debug snd_seq snd_seq_device snd_pcm e1000e snd_page_alloc snd_timer ptp snd pps_core soundcore xfs libcrc32c
CPU: 2 PID: 8095 Comm: trinity-child2 Not tainted 3.10.0-rc7+ #37
task: ffff8801f52c2520 ti: ffff8801e6430000 task.ti: ffff8801e6430000
RIP: 0010:[<ffffffff816e759c>]  [<ffffffff816e759c>] skb_panic+0x63/0x65
RSP: 0018:ffff8801e6431de8  EFLAGS: 00010282
RAX: 0000000000000086 RBX: ffff8802353d3cc0 RCX: 0000000000000006
RDX: 0000000000003b90 RSI: ffff8801f52c2ca0 RDI: ffff8801f52c2520
RBP: ffff8801e6431e08 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000001 R12: ffff88022ea0c800
R13: ffff88022ea0cdf8 R14: ffff8802353ecb40 R15: ffffffff81cc7800
FS:  00007f5720a10740(0000) GS:ffff880244c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000005862000 CR3: 000000022843c000 CR4: 00000000001407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
Stack:
 ffff88013deb6dec 000000000000002c 00000000000000c0 ffffffff81a3f6e4
 ffff8801e6431e18 ffffffff8159a9aa ffff8801e6431e90 ffffffff816765f6
 ffffffff810b756b 0000000700000002 ffff8801e6431e40 0000fea9292aa8c0
Call Trace:
 [<ffffffff8159a9aa>] skb_push+0x3a/0x40
 [<ffffffff816765f6>] ip6_push_pending_frames+0x1f6/0x4d0
 [<ffffffff810b756b>] ? mark_held_locks+0xbb/0x140
 [<ffffffff81694919>] udp_v6_push_pending_frames+0x2b9/0x3d0
 [<ffffffff81694660>] ? udplite_getfrag+0x20/0x20
 [<ffffffff8162092a>] udp_lib_setsockopt+0x1aa/0x1f0
 [<ffffffff811cc5e7>] ? fget_light+0x387/0x4f0
 [<ffffffff816958a4>] udpv6_setsockopt+0x34/0x40
 [<ffffffff815949f4>] sock_common_setsockopt+0x14/0x20
 [<ffffffff81593c31>] SyS_setsockopt+0x71/0xd0
 [<ffffffff816f5d54>] tracesys+0xdd/0xe2
Code: 00 00 48 89 44 24 10 8b 87 d8 00 00 00 48 89 44 24 08 48 8b 87 e8 00 00 00 48 c7 c7 c0 04 aa 81 48 89 04 24 31 c0 e8 e1 7e ff ff <0f> 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55
RIP  [<ffffffff816e759c>] skb_panic+0x63/0x65
 RSP <ffff8801e6431de8>

This patch adds a check if the pending data is of address family AF_INET
and directly calls udp_push_ending_frames from udp_v6_push_pending_frames
if that is the case.

This bug was found by Dave Jones with trinity.

(Also move the initialization of fl6 below the AF_INET check, even if
not strictly necessary.)

Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Dave Jones <davej@redhat.com>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/udp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/udp.h b/include/net/udp.h
index 67ea6fc..e723c9d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -180,6 +180,7 @@ extern int udp_get_port(struct sock *sk, unsigned short snum,
 extern void udp_err(struct sk_buff *, u32);
 extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			    struct msghdr *msg, size_t len);
+extern int udp_push_pending_frames(struct sock *sk);
 extern void udp_flush_pending_frames(struct sock *sk);
 extern int udp_rcv(struct sk_buff *skb);
 extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
-- 
cgit v1.1


From 2a6a2791b1e6ebd7ad29f137a309471f92d71c55 Mon Sep 17 00:00:00 2001
From: Ansis Atteka <aatteka@nicira.com>
Date: Wed, 18 Sep 2013 15:29:53 -0700
Subject: ip: generate unique IP identificator if local fragmentation is
 allowed

[ Upstream commit 703133de331a7a7df47f31fb9de51dc6f68a9de8 ]

If local fragmentation is allowed, then ip_select_ident() and
ip_select_ident_more() need to generate unique IDs to ensure
correct defragmentation on the peer.

For example, if IPsec (tunnel mode) has to encrypt large skbs
that have local_df bit set, then all IP fragments that belonged
to different ESP datagrams would have used the same identificator.
If one of these IP fragments would get lost or reordered, then
peer could possibly stitch together wrong IP fragments that did
not belong to the same datagram. This would lead to a packet loss
or data corruption.

Signed-off-by: Ansis Atteka <aatteka@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip.h   | 12 ++++++++----
 include/net/ipip.h |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 66dd491..2370f47 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -262,9 +262,11 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
 
 extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
 
-static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, struct sock *sk)
+static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
 {
-	if (iph->frag_off & htons(IP_DF)) {
+	struct iphdr *iph = ip_hdr(skb);
+
+	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
 		/* This is only to work around buggy Windows95/2000
 		 * VJ compression implementations.  If the ID field
 		 * does not change, they drop every other packet in
@@ -276,9 +278,11 @@ static inline void ip_select_ident(struct iphdr *iph, struct dst_entry *dst, str
 		__ip_select_ident(iph, dst, 0);
 }
 
-static inline void ip_select_ident_more(struct iphdr *iph, struct dst_entry *dst, struct sock *sk, int more)
+static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
 {
-	if (iph->frag_off & htons(IP_DF)) {
+	struct iphdr *iph = ip_hdr(skb);
+
+	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
 		if (sk && inet_sk(sk)->inet_daddr) {
 			iph->id = htons(inet_sk(sk)->inet_id);
 			inet_sk(sk)->inet_id += 1 + more;
diff --git a/include/net/ipip.h b/include/net/ipip.h
index a32654d..4dccfe3 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -50,7 +50,7 @@ struct ip_tunnel_prl_entry {
 	int pkt_len = skb->len - skb_transport_offset(skb);		\
 									\
 	skb->ip_summed = CHECKSUM_NONE;					\
-	ip_select_ident(iph, &rt->dst, NULL);				\
+	ip_select_ident(skb, &rt->dst, NULL);				\
 									\
 	err = ip_local_out(skb);					\
 	if (likely(net_xmit_eval(err) == 0)) {				\
-- 
cgit v1.1