diff options
-rw-r--r-- | include/net/tcp.h | 193 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 28 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 82 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 87 |
5 files changed, 198 insertions, 201 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index 3699304..77f21c6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -445,34 +445,16 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -/* Initialize RCV_MSS value. - * RCV_MSS is an our guess about MSS used by the peer. - * We haven't any direct information about the MSS. - * It's better to underestimate the RCV_MSS rather than overestimate. - * Overestimations make us ACKing less frequently than needed. - * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). - */ +extern void tcp_initialize_rcv_mss(struct sock *sk); -static inline void tcp_initialize_rcv_mss(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); - - hint = min(hint, tp->rcv_wnd/2); - hint = min(hint, TCP_MIN_RCVMSS); - hint = max(hint, TCP_MIN_MSS); - - inet_csk(sk)->icsk_ack.rcv_mss = hint; -} - -static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { tp->pred_flags = htonl((tp->tcp_header_len << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd); } -static __inline__ void tcp_fast_path_on(struct tcp_sock *tp) +static inline void tcp_fast_path_on(struct tcp_sock *tp) { __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); } @@ -490,7 +472,7 @@ static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp) * Rcv_nxt can be after the window if our peer push more data * than the offered window. */ -static __inline__ u32 tcp_receive_window(const struct tcp_sock *tp) +static inline u32 tcp_receive_window(const struct tcp_sock *tp) { s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; @@ -662,6 +644,7 @@ extern void tcp_cleanup_congestion_control(struct sock *sk); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); extern int tcp_set_congestion_control(struct sock *sk, const char *name); +extern void tcp_slow_start(struct tcp_sock *tp); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); @@ -701,7 +684,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) * "Packets left network, but not honestly ACKed yet" PLUS * "Packets fast retransmitted" */ -static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) +static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) { return (tp->packets_out - tp->left_out + tp->retrans_out); } @@ -721,33 +704,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) (tp->snd_cwnd >> 2))); } -/* - * Linear increase during slow start - */ -static inline void tcp_slow_start(struct tcp_sock *tp) -{ - if (sysctl_tcp_abc) { - /* RFC3465: Slow Start - * TCP sender SHOULD increase cwnd by the number of - * previously unacknowledged bytes ACKed by each incoming - * acknowledgment, provided the increase is not more than L - */ - if (tp->bytes_acked < tp->mss_cache) - return; - - /* We MAY increase by 2 if discovered delayed ack */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } - } - tp->bytes_acked = 0; - - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; -} - - static inline void tcp_sync_left_out(struct tcp_sock *tp) { if (tp->rx_opt.sack_ok && @@ -756,34 +712,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) tp->left_out = tp->sacked_out + tp->lost_out; } -/* Set slow start threshold and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - - tp->undo_marker = 0; - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp) + 1U); - tp->snd_cwnd_cnt = 0; - tp->high_seq = tp->snd_nxt; - tp->snd_cwnd_stamp = tcp_time_stamp; - TCP_ECN_queue_cwr(tp); -} - -static inline void tcp_enter_cwr(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tp->prior_ssthresh = 0; - tp->bytes_acked = 0; - if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(sk); - tcp_set_ca_state(sk, TCP_CA_CWR); - } -} - +extern void tcp_enter_cwr(struct sock *sk); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); /* Slow start with delack produces 3 packets of burst, so that @@ -815,14 +744,14 @@ static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) return left <= tcp_max_burst(tp); } -static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, - const struct sk_buff *skb) +static inline void tcp_minshall_update(struct tcp_sock *tp, int mss, + const struct sk_buff *skb) { if (skb->len < mss) tp->snd_sml = TCP_SKB_CB(skb)->end_seq; } -static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) +static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { const struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out && !icsk->icsk_pending) @@ -830,18 +759,18 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t icsk->icsk_rto, TCP_RTO_MAX); } -static __inline__ void tcp_push_pending_frames(struct sock *sk, - struct tcp_sock *tp) +static inline void tcp_push_pending_frames(struct sock *sk, + struct tcp_sock *tp) { __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); } -static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) { tp->snd_wl1 = seq; } -static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) +static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) { tp->snd_wl1 = seq; } @@ -849,19 +778,19 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) /* * Calculate(/check) TCP checksum */ -static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len, - unsigned long saddr, unsigned long daddr, - unsigned long base) +static inline u16 tcp_v4_check(struct tcphdr *th, int len, + unsigned long saddr, unsigned long daddr, + unsigned long base) { return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); } -static __inline__ int __tcp_checksum_complete(struct sk_buff *skb) +static inline int __tcp_checksum_complete(struct sk_buff *skb) { return __skb_checksum_complete(skb); } -static __inline__ int tcp_checksum_complete(struct sk_buff *skb) +static inline int tcp_checksum_complete(struct sk_buff *skb) { return skb->ip_summed != CHECKSUM_UNNECESSARY && __tcp_checksum_complete(skb); @@ -869,7 +798,7 @@ static __inline__ int tcp_checksum_complete(struct sk_buff *skb) /* Prequeue for VJ style copy to user, combined with checksumming. */ -static __inline__ void tcp_prequeue_init(struct tcp_sock *tp) +static inline void tcp_prequeue_init(struct tcp_sock *tp) { tp->ucopy.task = NULL; tp->ucopy.len = 0; @@ -885,7 +814,7 @@ static __inline__ void tcp_prequeue_init(struct tcp_sock *tp) * * NOTE: is this not too big to inline? */ -static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) +static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -926,7 +855,7 @@ static const char *statename[]={ }; #endif -static __inline__ void tcp_set_state(struct sock *sk, int state) +static inline void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; @@ -960,7 +889,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) #endif } -static __inline__ void tcp_done(struct sock *sk) +static inline void tcp_done(struct sock *sk) { tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); @@ -973,81 +902,13 @@ static __inline__ void tcp_done(struct sock *sk) inet_csk_destroy_sock(sk); } -static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) +static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; rx_opt->eff_sacks = 0; rx_opt->num_sacks = 0; } -static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp) -{ - if (tp->rx_opt.tstamp_ok) { - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | - TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); - *ptr++ = htonl(tp->rx_opt.ts_recent); - } - if (tp->rx_opt.eff_sacks) { - struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; - int this_sack; - - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_SACK << 8) | - (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * - TCPOLEN_SACK_PERBLOCK))); - for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { - *ptr++ = htonl(sp[this_sack].start_seq); - *ptr++ = htonl(sp[this_sack].end_seq); - } - if (tp->rx_opt.dsack) { - tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks--; - } - } -} - -/* Construct a tcp options header for a SYN or SYN_ACK packet. - * If this is every changed make sure to change the definition of - * MAX_SYN_SIZE to match the new maximum number of options that you - * can generate. - */ -static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, - int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent) -{ - /* We always get an MSS option. - * The option bytes which will be seen in normal data - * packets should timestamps be used, must be in the MSS - * advertised. But we subtract them from tp->mss_cache so - * that calculations in tcp_sendmsg are simpler etc. - * So account for this fact here if necessary. If we - * don't do this correctly, as a receiver we won't - * recognize data packets as being full sized when we - * should, and thus we won't abide by the delayed ACK - * rules correctly. - * SACKs don't matter, we never delay an ACK when we - * have any of those going out. - */ - *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); - if (ts) { - if(sack) - *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - else - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - *ptr++ = htonl(tstamp); /* TSVAL */ - *ptr++ = htonl(ts_recent); /* TSECR */ - } else if(sack) - *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | - (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); - if (offer_wscale) - *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); -} - /* Determine a window scaling and initial window to offer. */ extern void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, @@ -1072,9 +933,9 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static __inline__ void tcp_openreq_init(struct request_sock *req, - struct tcp_options_received *rx_opt, - struct sk_buff *skb) +static inline void tcp_openreq_init(struct request_sock *req, + struct tcp_options_received *rx_opt, + struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index c7cc62c..e688c68 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) return err; } + +/* + * Linear increase during slow start + */ +void tcp_slow_start(struct tcp_sock *tp) +{ + if (sysctl_tcp_abc) { + /* RFC3465: Slow Start + * TCP sender SHOULD increase cwnd by the number of + * previously unacknowledged bytes ACKed by each incoming + * acknowledgment, provided the increase is not more than L + */ + if (tp->bytes_acked < tp->mss_cache) + return; + + /* We MAY increase by 2 if discovered delayed ack */ + if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + } + } + tp->bytes_acked = 0; + + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; +} +EXPORT_SYMBOL_GPL(tcp_slow_start); + /* * TCP Reno congestion control * This is special case used for fallback as well. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 981d120..0a46123 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1; /* Adapt the MSS value used to make delayed ack decision to the * real world. */ -static inline void tcp_measure_rcv_mss(struct sock *sk, - const struct sk_buff *skb) +static void tcp_measure_rcv_mss(struct sock *sk, + const struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); const unsigned int lss = icsk->icsk_ack.last_seg_size; @@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, return 0; } -static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, + struct sk_buff *skb) { /* Check #1 */ if (tp->rcv_ssthresh < tp->window_clamp && @@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); } + +/* Initialize RCV_MSS value. + * RCV_MSS is an our guess about MSS used by the peer. + * We haven't any direct information about the MSS. + * It's better to underestimate the RCV_MSS rather than overestimate. + * Overestimations make us ACKing less frequently than needed. + * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss(). + */ +void tcp_initialize_rcv_mss(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); + + hint = min(hint, tp->rcv_wnd/2); + hint = min(hint, TCP_MIN_RCVMSS); + hint = max(hint, TCP_MIN_MSS); + + inet_csk(sk)->icsk_ack.rcv_mss = hint; +} + /* Receiver "autotuning" code. * * The algorithm for RTT estimation w/o timestamps is based on @@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } +/* Set slow start threshold and cwnd not falling to slow start */ +void tcp_enter_cwr(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->prior_ssthresh = 0; + tp->bytes_acked = 0; + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + tp->undo_marker = 0; + tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + tp->snd_cwnd = min(tp->snd_cwnd, + tcp_packets_in_flight(tp) + 1U); + tp->snd_cwnd_cnt = 0; + tp->high_seq = tp->snd_nxt; + tp->snd_cwnd_stamp = tcp_time_stamp; + TCP_ECN_queue_cwr(tp); + + tcp_set_ca_state(sk, TCP_CA_CWR); + } +} + /* Initialize metrics on socket. */ static void tcp_init_metrics(struct sock *sk) @@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, flag); } -static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, - u32 in_flight, int good) +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, + u32 in_flight, int good) { const struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); @@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, * RFC2988 recommends to restart timer to now+rto. */ -static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) +static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) { if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, return acked; } -static inline u32 tcp_usrtt(const struct sk_buff *skb) +static u32 tcp_usrtt(const struct sk_buff *skb) { struct timeval tv, now; @@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ -static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp) +static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, + struct tcp_sock *tp) { if (th->doff == sizeof(struct tcphdr)>>2) { tp->rx_opt.saw_tstamp = 0; @@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) } } -static __inline__ int -tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) +static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) { if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { if (before(seq, sp->start_seq)) @@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) return 0; } -static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) +static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) { if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { if (before(seq, tp->rcv_nxt)) @@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) } } -static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) +static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) { if (!tp->rx_opt.dsack) tcp_dsack_set(tp, seq, end_seq); @@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) } } -static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) +static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) { __u32 tmp; @@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) +static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) { /* If the user specified a specific send buffer setting, do * not modify it. @@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk) sk->sk_write_space(sk); } -static inline void tcp_check_space(struct sock *sk) +static void tcp_check_space(struct sock *sk) { if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); @@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk) } } -static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) +static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) { tcp_push_pending_frames(sk, tp); tcp_check_space(sk); @@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) } } -static __inline__ void tcp_ack_snd_check(struct sock *sk) +static inline void tcp_ack_snd_check(struct sock *sk) { if (!inet_csk_ack_scheduled(sk)) { /* We sent a data segment already. */ @@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) return result; } -static __inline__ int -tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) +static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) { return skb->ip_summed != CHECKSUM_UNNECESSARY && __tcp_checksum_complete_user(sk, skb); @@ -4474,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc); EXPORT_SYMBOL(tcp_parse_options); EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_state_process); +EXPORT_SYMBOL(tcp_initialize_rcv_mss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9b62d80..5c70493 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -270,8 +270,7 @@ failure: /* * This routine does path mtu discovery as defined in RFC1191. */ -static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, - u32 mtu) +static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) { struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); @@ -662,7 +661,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } -static inline void syn_flood_warning(struct sk_buff *skb) +static void syn_flood_warning(struct sk_buff *skb) { static unsigned long warntime; @@ -677,8 +676,8 @@ static inline void syn_flood_warning(struct sk_buff *skb) /* * Save and compile IPv4 options into the request_sock if needed. */ -static inline struct ip_options *tcp_v4_save_options(struct sock *sk, - struct sk_buff *skb) +static struct ip_options *tcp_v4_save_options(struct sock *sk, + struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); struct ip_options *dopt = NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3a0a914..a7623ea 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1; */ int sysctl_tcp_tso_win_divisor = 3; -static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static void update_send_head(struct sock *sk, struct tcp_sock *tp, + struct sk_buff *skb) { sk->sk_send_head = skb->next; if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) @@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) tp->snd_cwnd_used = 0; } -static inline void tcp_event_data_sent(struct tcp_sock *tp, - struct sk_buff *skb, struct sock *sk) +static void tcp_event_data_sent(struct tcp_sock *tp, + struct sk_buff *skb, struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; @@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp, icsk->icsk_ack.pingpong = 1; } -static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) +static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) { tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); @@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * value can be stuffed directly into th->window for an outgoing * frame. */ -static __inline__ u16 tcp_select_window(struct sock *sk) +static u16 tcp_select_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 cur_win = tcp_receive_window(tp); @@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk) return new_win; } +static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, + __u32 tstamp) +{ + if (tp->rx_opt.tstamp_ok) { + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *ptr++ = htonl(tstamp); + *ptr++ = htonl(tp->rx_opt.ts_recent); + } + if (tp->rx_opt.eff_sacks) { + struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; + int this_sack; + + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_SACK << 8) | + (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks * + TCPOLEN_SACK_PERBLOCK))); + for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) { + *ptr++ = htonl(sp[this_sack].start_seq); + *ptr++ = htonl(sp[this_sack].end_seq); + } + if (tp->rx_opt.dsack) { + tp->rx_opt.dsack = 0; + tp->rx_opt.eff_sacks--; + } + } +} + +/* Construct a tcp options header for a SYN or SYN_ACK packet. + * If this is every changed make sure to change the definition of + * MAX_SYN_SIZE to match the new maximum number of options that you + * can generate. + */ +static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack, + int offer_wscale, int wscale, __u32 tstamp, + __u32 ts_recent) +{ + /* We always get an MSS option. + * The option bytes which will be seen in normal data + * packets should timestamps be used, must be in the MSS + * advertised. But we subtract them from tp->mss_cache so + * that calculations in tcp_sendmsg are simpler etc. + * So account for this fact here if necessary. If we + * don't do this correctly, as a receiver we won't + * recognize data packets as being full sized when we + * should, and thus we won't abide by the delayed ACK + * rules correctly. + * SACKs don't matter, we never delay an ACK when we + * have any of those going out. + */ + *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); + if (ts) { + if(sack) + *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + else + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); + *ptr++ = htonl(tstamp); /* TSVAL */ + *ptr++ = htonl(ts_recent); /* TSECR */ + } else if(sack) + *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); + if (offer_wscale) + *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale)); +} /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial @@ -724,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) /* Congestion window validation. (RFC2861) */ -static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) +static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) { __u32 packets_out = tp->packets_out; @@ -773,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk /* This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) +static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); @@ -1794,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* * Do all connect socket setups that can be done AF independent. */ -static inline void tcp_connect_init(struct sock *sk) +static void tcp_connect_init(struct sock *sk) { struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); |