假定客户端执行主动打开,服务器执行被动打开,客户端发送syn包到服务器,服务器接收该包,进行建立连接请求的相关处理,即第一次握手;本文主要分析第一次握手中被动打开端的处理流程,主动打开端的处理请查阅本博客内另外的文章;
IPv4携带的TCP报文最终会进入到tcp_v4_do_rcv函数,服务器准备接收连接请求时,是处于LISTEN状态的,所以我们只关心这部分的相关处理;函数中LISTEN条件分支中,主要是对启用了syn cookies的检查,我们暂且不做分析;主要看tcp_rcv_state_process这个函数,syn连接请求最终会进入到该函数中进行处理;
1 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
2 {
3 struct sock *rsk;
4
5 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
6 struct dst_entry *dst = sk->sk_rx_dst;
7
8 sock_rps_save_rxhash(sk, skb);
9 sk_mark_napi_id(sk, skb);
10 if (dst) {
11 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
12 !dst->ops->check(dst, 0)) {
13 dst_release(dst);
14 sk->sk_rx_dst = NULL;
15 }
16 }
17 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
18 return 0;
19 }
20
21 if (tcp_checksum_complete(skb))
22 goto csum_err;
23
24 /* LISTEN状态处理 */
25 if (sk->sk_state == TCP_LISTEN) {
26
27 /* syn cookies检查 */
28 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
29
30 if (!nsk)
31 goto discard;
32 if (nsk != sk) {
33 if (tcp_child_process(sk, nsk, skb)) {
34 rsk = nsk;
35 goto reset;
36 }
37 return 0;
38 }
39 } else
40 sock_rps_save_rxhash(sk, skb);
41
42 /* ESTABLISHED and TIME_WAIT状态以外的其他状态处理 */
43 if (tcp_rcv_state_process(sk, skb)) {
44 rsk = sk;
45 goto reset;
46 }
47 return 0;
48
49 reset:
50 tcp_v4_send_reset(rsk, skb);
51 discard:
52 kfree_skb(skb);
53 /* Be careful here. If this function gets more complicated and
54 * gcc suffers from register pressure on the x86, sk (in %ebx)
55 * might be destroyed here. This current version compiles correctly,
56 * but you have been warned.
57 */
58 return 0;
59
60 csum_err:
61 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
62 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
63 goto discard;
64 }
tcp_rcv_state_process对syn包进行处理,不接收ack包,丢弃含有rst和fin的包,对于合格的syn请求包,则继续调用conn_request回调进行处理,TCPv4中对应的函数为tcp_v4_conn_request;
1 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
2 {
3 /* 省略了一些无关代码 */
4
5 switch (sk->sk_state) {
6 case TCP_CLOSE:
7 goto discard;
8
9 case TCP_LISTEN:
10 /* 不接收ack */
11 if (th->ack)
12 return 1;
13
14 /* 丢弃带有rst标记的包 */
15 if (th->rst)
16 goto discard;
17
18 /* 处理syn请求包 */
19 if (th->syn) {
20 /* 丢弃带有fin标志的包 */
21 if (th->fin)
22 goto discard;
23 /* It is possible that we process SYN packets from backlog,
24 * so we need to make sure to disable BH right there.
25 */
26 local_bh_disable();
27 /* 进入连接请求处理 */
28 acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
29 local_bh_enable();
30
31 /* 连接失败 */
32 if (!acceptable)
33 return 1;
34
35 /* 连接成功 */
36 consume_skb(skb);
37 return 0;
38 }
39 goto discard;
40 }
41 /* 省略了一些无关代码 */
42 }
tcp_v4_conn_request函数对传入包的路由类型进行检查,如果是发往广播或者组播的,则丢弃该包,合法包进入tcp_conn_request函数继续进行请求处理,其中参数传入了请求控制块操作函数结构指针;
1 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
2 {
3 /* Never answer to SYNs send to broadcast or multicast */
4 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
5 goto drop;
6
7 return tcp_conn_request(&tcp_request_sock_ops,
8 &tcp_request_sock_ipv4_ops, sk, skb);
9
10 drop:
11 tcp_listendrop(sk);
12 return 0;
13 }
tcp_conn_request函数为syn请求的核心处理流程,我们暂且忽略其中的syn cookies和fastopen相关流程,其核心功能为分析请求参数,新建连接请求控制块,注意,新建请求控制操作中会将连接状态更新为TCP_NEW_SYN_RECV ,并初始化相关成员,初始化完毕之后,加入到半连接队列accept queue中,然后恢复syn+ack包给客户端;
1 int tcp_conn_request(struct request_sock_ops *rsk_ops,
2 const struct tcp_request_sock_ops *af_ops,
3 struct sock *sk, struct sk_buff *skb)
4 {
5 struct tcp_fastopen_cookie foc = { .len = -1 };
6 __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
7 struct tcp_options_received tmp_opt;
8 struct tcp_sock *tp = tcp_sk(sk);
9 struct net *net = sock_net(sk);
10 struct sock *fastopen_sk = NULL;
11 struct dst_entry *dst = NULL;
12 struct request_sock *req;
13 bool want_cookie = false;
14 struct flowi fl;
15
16 /* TW buckets are converted to open requests without
17 * limitations, they conserve resources and peer is
18 * evidently real one.
19 */
20 if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
21 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
22 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
23 if (!want_cookie)
24 goto drop;
25 }
26
27 /* 如果连接队列长度已达到上限,丢包 */
28 if (sk_acceptq_is_full(sk)) {
29 NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
30 goto drop;
31 }
32
33 /*
34 分配请求控制块,请求控制块的操作指向rsk_ops ,
35 注意: 这个函数将连接状态更新为TCP_NEW_SYN_RECV
36 */
37 req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
38 if (!req)
39 goto drop;
40
41 /* 初始化特定操作函数 */
42 tcp_rsk(req)->af_specific = af_ops;
43 tcp_rsk(req)->ts_off = 0;
44
45 /* 情况保存tcp选项的相关字段 */
46 tcp_clear_options(&tmp_opt);
47
48 /* 初始化最大mss */
49 tmp_opt.mss_clamp = af_ops->mss_clamp;
50 /* 初始化用户定义mss */
51 tmp_opt.user_mss = tp->rx_opt.user_mss;
52
53 /* 解析tcp选项,其中会取user_mss和对端通告mss的较小值记录到mss_clamp中 */
54 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
55
56 if (want_cookie && !tmp_opt.saw_tstamp)
57 tcp_clear_options(&tmp_opt);
58
59 /* 记录是否在syn中有时间戳选项 */
60 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
61
62 /* 使用对端信息对请求控制块做初始化 */
63 tcp_openreq_init(req, &tmp_opt, skb, sk);
64
65 /* 不做源地址检查?? */
66 inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
67
68 /* Note: tcp_v6_init_req() might override ir_iif for link locals */
69 inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
70
71 /* 初始化控制块中的目的地址,源地址,ip选项 */
72 af_ops->init_req(req, sk, skb);
73
74 if (security_inet_conn_request(sk, skb, req))
75 goto drop_and_free;
76
77 /* 有时间戳选项,计算时间戳偏移?? */
78 if (tmp_opt.tstamp_ok)
79 tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb);
80
81 /* 不需要cookie,序号未初始化 */
82 if (!want_cookie && !isn) {
83 /* Kill the following clause, if you dislike this way. */
84 /* 未开启cookie && 队列剩余小于队列大小的一半&& 对端验证未通过 */
85 if (!net->ipv4.sysctl_tcp_syncookies &&
86 (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
87 (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
88 !tcp_peer_is_proven(req, dst)) {
89 /* Without syncookies last quarter of
90 * backlog is filled with destinations,
91 * proven to be alive.
92 * It means that we continue to communicate
93 * to destinations, already remembered
94 * to the moment of synflood.
95 */
96 pr_drop_req(req, ntohs(tcp_hdr(skb)->source),
97 rsk_ops->family);
98 goto drop_and_release;
99 }
100
101 /* 根据源目的地址和端口初始化序号 */
102 isn = af_ops->init_seq(skb);
103 }
104
105 /* 没有路由要查路由 */
106 if (!dst) {
107 dst = af_ops->route_req(sk, &fl, req);
108 if (!dst)
109 goto drop_and_free;
110 }
111
112 /* ecn 相关*/
113 tcp_ecn_create_request(req, skb, sk, dst);
114
115 /* syn cookies相关 */
116 if (want_cookie) {
117 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
118 req->cookie_ts = tmp_opt.tstamp_ok;
119 if (!tmp_opt.tstamp_ok)
120 inet_rsk(req)->ecn_ok = 0;
121 }
122
123 /* 初始化发送序号和hash */
124 tcp_rsk(req)->snt_isn = isn;
125 tcp_rsk(req)->txhash = net_tx_rndhash();
126
127 /* 窗口相关初始化todo */
128 tcp_openreq_init_rwin(req, sk, dst);
129
130 if (!want_cookie) {
131 /* 记录syn包头 */
132 tcp_reqsk_record_syn(sk, req, skb);
133 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
134 }
135
136 /* fastopen相关 */
137 if (fastopen_sk) {
138 af_ops->send_synack(fastopen_sk, dst, &fl, req,
139 &foc, TCP_SYNACK_FASTOPEN);
140 /* Add the child socket directly into the accept queue */
141 inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
142 sk->sk_data_ready(sk);
143 bh_unlock_sock(fastopen_sk);
144 sock_put(fastopen_sk);
145 } else {
146 /* 不是fastopen */
147 tcp_rsk(req)->tfo_listener = false;
148
149 /* 加入ehash,启动请求重传定时器 */
150 if (!want_cookie)
151 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
152
153 /* 发送syn+ack */
154 af_ops->send_synack(sk, dst, &fl, req, &foc,
155 !want_cookie ? TCP_SYNACK_NORMAL :
156 TCP_SYNACK_COOKIE);
157 if (want_cookie) {
158 reqsk_free(req);
159 return 0;
160 }
161 }
162 reqsk_put(req);
163 return 0;
164
165 drop_and_release:
166 dst_release(dst);
167 drop_and_free:
168 reqsk_free(req);
169 drop:
170 tcp_listendrop(sk);
171 return 0;
172 }