Skip to content

Commit

Permalink
Handle TCP_INFO socket option
Browse files Browse the repository at this point in the history
  • Loading branch information
tbodt committed Jan 20, 2020
1 parent 0a01c66 commit 66f6708
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 1 deletion.
64 changes: 63 additions & 1 deletion fs/sock.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
#include <fcntl.h>
#include <netinet/tcp.h>
#if defined(__APPLE__)
#include <netinet/tcp_fsm.h>
#endif
#include <string.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/stat.h>
#include <sys/un.h>
#include "kernel/calls.h"
#include "fs/fd.h"
#include "fs/inode.h"
Expand Down Expand Up @@ -632,6 +636,8 @@ int_t sys_shutdown(fd_t sock_fd, dword_t how) {
return 0;
}

#define DEFAULT_TCP_CONGESTION "cubic"

int_t sys_setsockopt(fd_t sock_fd, dword_t level, dword_t option, addr_t value_addr, dword_t value_len) {
STRACE("setsockopt(%d, %d, %d, 0x%x, %d)", sock_fd, level, option, value_addr, value_len);
struct fd *sock = sock_getfd(sock_fd);
Expand All @@ -647,6 +653,14 @@ int_t sys_setsockopt(fd_t sock_fd, dword_t level, dword_t option, addr_t value_a
// IP_MTU_DISCOVER has no equivalent on Darwin
if (level == IPPROTO_IP && option == IP_MTU_DISCOVER_)
return 0;
// TCP_CONGESTION also has no equivalent on Darwin
#if defined(__APPLE__)
if (level == IPPROTO_TCP && option == TCP_CONGESTION_) {
if (strncmp(value, DEFAULT_TCP_CONGESTION, sizeof(value)) == 0)
return 0;
return _ENOENT;
}
#endif

int real_opt = sock_opt_to_real(option, level);
if (real_opt < 0)
Expand Down Expand Up @@ -700,6 +714,54 @@ int_t sys_getsockopt(fd_t sock_fd, dword_t level, dword_t option, addr_t value_a
*cred = sock->socket.unix_peer->socket.unix_cred;
}
unlock(&peer_lock);
} else if (level == IPPROTO_TCP && option == TCP_CONGESTION_) {
value_len = strlen(DEFAULT_TCP_CONGESTION);
memcpy(value, DEFAULT_TCP_CONGESTION, value_len);
#if defined(__APPLE__)
} else if (level == IPPROTO_TCP && option == TCP_INFO_) {
// This one's fun. On Linux, the struct is not ABI dependent, so no
// special handling is needed. On Darwin, the struct is completely
// different and has a different sockopt name.
struct tcp_connection_info conn_info;
socklen_t conn_info_size = sizeof(conn_info);
int err = getsockopt(sock->real_fd, IPPROTO_TCP, TCP_CONNECTION_INFO, &conn_info, &conn_info_size);
if (err < 0)
return errno_map();

static const uint8_t tcp_state_table[] = {
[TCPS_CLOSED] = 7,
[TCPS_LISTEN] = 10,
[TCPS_SYN_SENT] = 2,
[TCPS_SYN_RECEIVED] = 3,
[TCPS_ESTABLISHED] = 1,
[TCPS_CLOSE_WAIT] = 8,
[TCPS_FIN_WAIT_1] = 4,
[TCPS_CLOSING] = 11,
[TCPS_LAST_ACK] = 9,
[TCPS_FIN_WAIT_2] = 5,
[TCPS_TIME_WAIT] = 6,
};
struct tcp_info_ info = {
.state = tcp_state_table[conn_info.tcpi_state],
.options = conn_info.tcpi_options,
.snd_wscale = conn_info.tcpi_snd_wscale,
.rcv_wscale = conn_info.tcpi_rcv_wscale,

.rto = conn_info.tcpi_rto * 1000,
.snd_mss = conn_info.tcpi_maxseg,

.rtt = conn_info.tcpi_srtt * 1000,
.rttvar = conn_info.tcpi_rttvar * 1000,
.snd_ssthresh = conn_info.tcpi_snd_ssthresh,
.snd_cwnd = conn_info.tcpi_snd_cwnd / conn_info.tcpi_maxseg,

// https://lkml.org/lkml/2017/4/24/923
.total_retrans = conn_info.tcpi_txretransmitpackets,
};
if (value_len > sizeof(struct tcp_info_))
value_len = sizeof(struct tcp_info_);
memcpy(value, &info, value_len);
#endif
} else {
int real_opt = sock_opt_to_real(option, level);
if (real_opt < 0)
Expand Down
46 changes: 46 additions & 0 deletions fs/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ static inline int sock_flags_from_real(int real) {
#define IP_RECVTOS_ 13
#define TCP_NODELAY_ 1
#define TCP_DEFER_ACCEPT_ 9
#define TCP_INFO_ 11
#define TCP_CONGESTION_ 13
#define IPV6_UNICAST_HOPS_ 16
#define IPV6_V6ONLY_ 26
#define IPV6_TCLASS_ 67
Expand All @@ -211,6 +213,10 @@ static inline int sock_opt_to_real(int fake, int level) {
case IPPROTO_TCP: switch (fake) {
case TCP_NODELAY_: return TCP_NODELAY;
case TCP_DEFER_ACCEPT_: return 0; // unimplemented
#if defined(__linux__)
case TCP_INFO_: return TCP_INFO;
case TCP_CONGESTION_: return TCP_CONGESTION;
#endif
} break;
case IPPROTO_IP: switch (fake) {
case IP_TOS_: return IP_TOS;
Expand All @@ -237,4 +243,44 @@ static inline int sock_level_to_real(int fake) {

extern const char *sock_tmp_prefix;

struct tcp_info_ {
uint8_t state;
uint8_t ca_state;
uint8_t retransmits;
uint8_t probes;
uint8_t backoff;
uint8_t options;
uint8_t snd_wscale:4, rcv_wscale:4;

uint32_t rto;
uint32_t ato;
uint32_t snd_mss;
uint32_t rcv_mss;

uint32_t unacked;
uint32_t sacked;
uint32_t lost;
uint32_t retrans;
uint32_t fackets;

uint32_t last_data_sent;
uint32_t last_ack_sent;
uint32_t last_data_recv;
uint32_t last_ack_recv;

uint32_t pmtu;
uint32_t rcv_ssthresh;
uint32_t rtt;
uint32_t rttvar;
uint32_t snd_ssthresh;
uint32_t snd_cwnd;
uint32_t advmss;
uint32_t reordering;

uint32_t rcv_rtt;
uint32_t rcv_space;

uint32_t total_retrans;
};

#endif

0 comments on commit 66f6708

Please sign in to comment.