diff --git a/bsd/man/man2/getattrlist.2 b/bsd/man/man2/getattrlist.2 index a92c08dcd..b790fe013 100644 --- a/bsd/man/man2/getattrlist.2 +++ b/bsd/man/man2/getattrlist.2 @@ -1698,6 +1698,10 @@ or .Em attrBuf points to an invalid address. . +.It Bq Er ERANGE +.Fa attrBufSize +is too small to hold a u_int32_t. +. .It Bq Er EINVAL The .Fa bitmapcount diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index c4acddac2..0438c30a1 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -67,7 +67,9 @@ #include #include +#include #include +#include #include #include #include diff --git a/bsd/net/if.c b/bsd/net/if.c index 33e626245..0fe5872ea 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -1139,7 +1139,26 @@ ifa_ifwithnet_common(const struct sockaddr *addr, unsigned int ifscope) /* * Find an interface address specific to an interface best matching - * a given address. + * a given address applying same source address selection rules + * as done in the kernel for implicit source address binding + */ +struct ifaddr * +ifaof_ifpforaddr_select(const struct sockaddr *addr, struct ifnet *ifp) +{ + u_int af = addr->sa_family; + + if (af == AF_INET6) + return (in6_selectsrc_core_ifa(__DECONST(struct sockaddr_in6 *, addr), ifp, 0)); + + return (ifaof_ifpforaddr(addr, ifp)); +} + +/* + * Find an interface address specific to an interface best matching + * a given address without regards to source address selection. + * + * This is appropriate for use-cases where we just want to update/init + * some data structure like routing table entries. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) @@ -1311,6 +1330,7 @@ if_updown( struct ifnet *ifp, int up) } ifnet_touch_lastchange(ifp); + ifnet_touch_lastupdown(ifp); /* Drop the lock to notify addresses and route */ ifnet_lock_done(ifp); diff --git a/bsd/net/if_fake.c b/bsd/net/if_fake.c index a4c7a69cb..96f8a3e43 100644 --- a/bsd/net/if_fake.c +++ b/bsd/net/if_fake.c @@ -508,6 +508,7 @@ copy_mbuf(struct mbuf *m) } mbuf_setlen(copy_m, pkt_len); copy_m->m_pkthdr.len = pkt_len; + copy_m->m_pkthdr.pkt_svc = m->m_pkthdr.pkt_svc; offset = 0; while (m != NULL && offset < pkt_len) { uint32_t frag_len; diff --git a/bsd/net/if_ipsec.c b/bsd/net/if_ipsec.c index 7bf02edb9..098420596 100644 --- a/bsd/net/if_ipsec.c +++ b/bsd/net/if_ipsec.c @@ -196,7 +196,7 @@ struct ipsec_pcb { u_int32_t ipsec_tx_fsw_ring_size; u_int32_t ipsec_rx_fsw_ring_size; bool ipsec_use_netif; - + bool ipsec_needs_netagent; #endif // IPSEC_NEXUS }; @@ -370,6 +370,24 @@ ipsec_interface_isvalid (ifnet_t interface) return 1; } +boolean_t +ipsec_interface_needs_netagent(ifnet_t interface) +{ + struct ipsec_pcb *pcb = NULL; + + if (interface == NULL) { + return (FALSE); + } + + pcb = ifnet_softc(interface); + + if (pcb == NULL) { + return (FALSE); + } + + return (pcb->ipsec_needs_netagent == true); +} + static errno_t ipsec_ifnet_set_attrs(ifnet_t ifp) { @@ -2755,9 +2773,11 @@ ipsec_ctl_setopt(__unused kern_ctl_ref kctlref, } if (*(int *)data) { - if_add_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent); + if_add_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent); + pcb->ipsec_needs_netagent = true; } else { - if_delete_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent); + pcb->ipsec_needs_netagent = false; + if_delete_netagent(pcb->ipsec_ifp, pcb->ipsec_nx.ms_agent); } break; } diff --git a/bsd/net/if_ipsec.h b/bsd/net/if_ipsec.h index 93429c1ad..ca18916cf 100644 --- a/bsd/net/if_ipsec.h +++ b/bsd/net/if_ipsec.h @@ -40,6 +40,7 @@ errno_t ipsec_register_control(void); /* Helpers */ int ipsec_interface_isvalid (ifnet_t interface); +boolean_t ipsec_interface_needs_netagent(ifnet_t interface); errno_t ipsec_inject_inbound_packet(ifnet_t interface, mbuf_t packet); diff --git a/bsd/net/if_utun.c b/bsd/net/if_utun.c index ac64bab88..225d9c21c 100644 --- a/bsd/net/if_utun.c +++ b/bsd/net/if_utun.c @@ -114,6 +114,7 @@ struct utun_pcb { u_int32_t utun_tx_fsw_ring_size; u_int32_t utun_rx_fsw_ring_size; bool utun_use_netif; + bool utun_needs_netagent; #endif // UTUN_NEXUS }; @@ -2083,7 +2084,9 @@ utun_ctl_setopt(__unused kern_ctl_ref kctlref, if (*(int *)data) { if_add_netagent(pcb->utun_ifp, pcb->utun_nx.ms_agent); + pcb->utun_needs_netagent = true; } else { + pcb->utun_needs_netagent = false; if_delete_netagent(pcb->utun_ifp, pcb->utun_nx.ms_agent); } break; @@ -2794,6 +2797,23 @@ utun_register_nexus(void) } return (0); } +boolean_t +utun_interface_needs_netagent(ifnet_t interface) +{ + struct utun_pcb *pcb = NULL; + + if (interface == NULL) { + return (FALSE); + } + + pcb = ifnet_softc(interface); + + if (pcb == NULL) { + return (FALSE); + } + + return (pcb->utun_needs_netagent == true); +} static errno_t utun_ifnet_set_attrs(ifnet_t ifp) diff --git a/bsd/net/if_utun.h b/bsd/net/if_utun.h index 4e9868a83..460fcdb62 100644 --- a/bsd/net/if_utun.h +++ b/bsd/net/if_utun.h @@ -37,6 +37,7 @@ void* utun_alloc(size_t size); void utun_free(void *ptr); errno_t utun_register_control(void); +boolean_t utun_interface_needs_netagent(ifnet_t interface); #endif diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h index 835a4535b..379b792ad 100644 --- a/bsd/net/if_var.h +++ b/bsd/net/if_var.h @@ -688,6 +688,7 @@ struct if_data_internal { u_int64_t ifi_fpackets; /* forwarded packets on interface */ u_int64_t ifi_fbytes; /* forwarded bytes on interface */ struct timeval ifi_lastchange; /* time of last administrative change */ + struct timeval ifi_lastupdown; /* time of last up/down event */ u_int32_t ifi_hwassist; /* HW offload capabilities */ u_int32_t ifi_tso_v4_mtu; /* TCP Segment Offload IPv4 maximum segment size */ u_int32_t ifi_tso_v6_mtu; /* TCP Segment Offload IPv6 maximum segment size */ @@ -726,6 +727,7 @@ struct if_data_internal { #define if_dt_bytes if_data.ifi_dt_bytes #define if_fpackets if_data.ifi_fpackets #define if_fbytes if_data.ifi_fbytes +#define if_lastupdown if_data.ifi_lastupdown #endif /* BSD_KERNEL_PRIVATE */ #ifdef BSD_KERNEL_PRIVATE @@ -1503,6 +1505,7 @@ extern struct ifaddr *ifa_ifwithroute_locked(int, const struct sockaddr *, const struct sockaddr *); extern struct ifaddr *ifa_ifwithroute_scoped_locked(int, const struct sockaddr *, const struct sockaddr *, unsigned int); +extern struct ifaddr *ifaof_ifpforaddr_select(const struct sockaddr *, struct ifnet *); extern struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); __private_extern__ struct ifaddr *ifa_ifpgetprimary(struct ifnet *, int); extern void ifa_addref(struct ifaddr *, int); diff --git a/bsd/net/kpi_interface.c b/bsd/net/kpi_interface.c index c5f268213..2109e94cb 100644 --- a/bsd/net/kpi_interface.c +++ b/bsd/net/kpi_interface.c @@ -1703,6 +1703,35 @@ ifnet_lastchange(ifnet_t interface, struct timeval *last_change) return (0); } +errno_t +ifnet_touch_lastupdown(ifnet_t interface) +{ + if (interface == NULL) { + return (EINVAL); + } + + TOUCHLASTCHANGE(&interface->if_lastupdown); + + return (0); +} + +errno_t +ifnet_updown_delta(ifnet_t interface, struct timeval *updown_delta) +{ + if (interface == NULL) { + return (EINVAL); + } + + /* Calculate the delta */ + updown_delta->tv_sec = net_uptime(); + if (updown_delta->tv_sec > interface->if_data.ifi_lastupdown.tv_sec) { + updown_delta->tv_sec -= interface->if_data.ifi_lastupdown.tv_sec; + } + updown_delta->tv_usec = 0; + + return (0); +} + errno_t ifnet_get_address_list(ifnet_t interface, ifaddr_t **addresses) { @@ -2441,7 +2470,7 @@ ifaddr_findbestforaddr(const struct sockaddr *addr, ifnet_t interface) if (addr == NULL || interface == NULL) return (NULL); - return (ifaof_ifpforaddr(addr, interface)); + return (ifaof_ifpforaddr_select(addr, interface)); } errno_t diff --git a/bsd/net/kpi_interface.h b/bsd/net/kpi_interface.h index 15b7fd09f..0dd25f44c 100644 --- a/bsd/net/kpi_interface.h +++ b/bsd/net/kpi_interface.h @@ -3556,6 +3556,23 @@ extern errno_t ifnet_get_buffer_status(const ifnet_t interface, */ extern void ifnet_normalise_unsent_data(void); +/*! + @function ifnet_touch_lastupdown + @discussion Updates the lastupdown value to now. + @param interface The interface. + @result 0 on success otherwise the errno error. + */ +extern errno_t ifnet_touch_lastupdown(ifnet_t interface); + +/*! + @function ifnet_updown_delta + @discussion Retrieves the difference between lastupdown and now. + @param interface The interface. + @param updown_delta A timeval struct to copy the delta between lastupdown and now. + to. + */ +extern errno_t ifnet_updown_delta(ifnet_t interface, struct timeval *updown_delta); + #endif /* KERNEL_PRIVATE */ __END_DECLS diff --git a/bsd/net/necp.h b/bsd/net/necp.h index d20124f67..d4f42f386 100644 --- a/bsd/net/necp.h +++ b/bsd/net/necp.h @@ -502,6 +502,7 @@ typedef struct necp_cache_buffer { #define NECP_CLIENT_RESULT_TFO_COOKIE 13 // NECP_TFO_COOKIE_LEN_MAX #define NECP_CLIENT_RESULT_TFO_FLAGS 14 // u_int8_t #define NECP_CLIENT_RESULT_RECOMMENDED_MSS 15 // u_int8_t +#define NECP_CLIENT_RESULT_INTERFACE_TIME_DELTA 17 // u_int32_t, seconds since interface up/down #define NECP_CLIENT_RESULT_NEXUS_INSTANCE 100 // uuid_t #define NECP_CLIENT_RESULT_NEXUS_PORT 101 // u_int16_t diff --git a/bsd/net/necp_client.c b/bsd/net/necp_client.c index 9341db09b..41e6efaa8 100644 --- a/bsd/net/necp_client.c +++ b/bsd/net/necp_client.c @@ -177,6 +177,7 @@ SYSCTL_INT(_net_necp, NECPCTL_IF_FLOW_COUNT, if_flow_count, CTLFLAG_LOCKED | CTL SYSCTL_INT(_net_necp, NECPCTL_OBSERVER_MESSAGE_LIMIT, observer_message_limit, CTLFLAG_LOCKED | CTLFLAG_RW, &necp_observer_message_limit, 256, ""); #define NECP_MAX_CLIENT_LIST_SIZE 1024 * 1024 // 1MB +#define NECP_MAX_AGENT_ACTION_SIZE 256 extern int tvtohz(struct timeval *); extern unsigned int get_maxmtu(struct rtentry *); @@ -2347,6 +2348,16 @@ necp_update_client_result(proc_t proc, interface_struct.generation = ifnet_get_generation(direct_interface); cursor = necp_buffer_write_tlv_if_different(cursor, NECP_CLIENT_RESULT_INTERFACE, sizeof(interface_struct), &interface_struct, &updated, client->result, sizeof(client->result)); + + // Set the delta time since interface up/down + struct timeval updown_delta = {}; + if (ifnet_updown_delta(direct_interface, &updown_delta) == 0) { + u_int32_t delta = updown_delta.tv_sec; + bool ignore_updated = FALSE; + cursor = necp_buffer_write_tlv_if_different(cursor, NECP_CLIENT_RESULT_INTERFACE_TIME_DELTA, + sizeof(delta), &delta, &ignore_updated, + client->result, sizeof(client->result)); + } } if (delegate_interface != NULL) { struct necp_client_result_interface interface_struct; @@ -4069,6 +4080,12 @@ necp_client_agent_action(struct necp_fd_data *fd_data, struct necp_client_action goto done; } + if (uap->buffer_size > NECP_MAX_AGENT_ACTION_SIZE) { + NECPLOG(LOG_ERR, "necp_client_agent_action invalid buffer size (>%u)", NECP_MAX_AGENT_ACTION_SIZE); + error = EINVAL; + goto done; + } + if ((parameters = _MALLOC(uap->buffer_size, M_NECP, M_WAITOK | M_ZERO)) == NULL) { NECPLOG0(LOG_ERR, "necp_client_agent_action malloc failed"); error = ENOMEM; diff --git a/bsd/net/packet_mangler.c b/bsd/net/packet_mangler.c index 2efa88cc2..a09e7c74c 100644 --- a/bsd/net/packet_mangler.c +++ b/bsd/net/packet_mangler.c @@ -838,9 +838,7 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u struct packet_mangler *p_pkt_mnglr = (struct packet_mangler *)cookie; struct ip ip; struct tcphdr tcp; - char tcp_opt_buf[TCP_MAX_OPTLEN] = {0}; - int orig_tcp_optlen; - int tcp_optlen = 0; + int ip_pld_len; errno_t error = 0; if (p_pkt_mnglr == NULL) { @@ -902,6 +900,8 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u goto input_done; } + ip_pld_len = ntohs(ip.ip_len) - (ip.ip_hl << 2); + if (protocol != p_pkt_mnglr->proto) { PKT_MNGLR_LOG(LOG_INFO, "Skip: Protocol mismatch"); goto input_done; @@ -909,6 +909,11 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u switch (protocol) { case IPPROTO_TCP: + if (ip_pld_len < (int) sizeof(tcp)) { + PKT_MNGLR_LOG(LOG_ERR, "IP total len not big enough for TCP: %d", ip_pld_len); + goto drop_it; + } + error = mbuf_copydata(*data, offset, sizeof(tcp), &tcp); if (error) { PKT_MNGLR_LOG(LOG_ERR, "Could not make local TCP header copy"); @@ -942,15 +947,28 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u switch (protocol) { case IPPROTO_TCP: if (p_pkt_mnglr->proto_action_mask) { - int i = 0; - tcp_optlen = (tcp.th_off << 2)-sizeof(struct tcphdr); + char tcp_opt_buf[TCP_MAX_OPTLEN] = {0}; + int orig_tcp_optlen; + int tcp_optlen = 0; + int i = 0, off; + + off = (tcp.th_off << 2); + + if (off < (int) sizeof(struct tcphdr) || off > ip_pld_len) { + PKT_MNGLR_LOG(LOG_ERR, "TCP header offset is wrong: %d", off); + goto drop_it; + } + + + tcp_optlen = off - sizeof(struct tcphdr); + PKT_MNGLR_LOG(LOG_INFO, "Packet from F5 is TCP\n"); PKT_MNGLR_LOG(LOG_INFO, "Optlen: %d\n", tcp_optlen); orig_tcp_optlen = tcp_optlen; if (orig_tcp_optlen) { error = mbuf_copydata(*data, offset+sizeof(struct tcphdr), orig_tcp_optlen, tcp_opt_buf); if (error) { - PKT_MNGLR_LOG(LOG_ERR, "Failed to copy tcp options"); + PKT_MNGLR_LOG(LOG_ERR, "Failed to copy tcp options: error %d offset %d optlen %d", error, offset, orig_tcp_optlen); goto input_done; } } @@ -963,6 +981,12 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u continue; } else if ((tcp_opt_buf[i] != 0) && (tcp_opt_buf[i] != TCP_OPT_MULTIPATH_TCP)) { PKT_MNGLR_LOG(LOG_INFO, "Skipping option %x\n", tcp_opt_buf[i]); + + /* Minimum TCP option size is 2 */ + if (tcp_opt_buf[i+1] < 2) { + PKT_MNGLR_LOG(LOG_ERR, "Received suspicious TCP option"); + goto drop_it; + } tcp_optlen -= tcp_opt_buf[i+1]; i += tcp_opt_buf[i+1]; continue; @@ -997,14 +1021,18 @@ static errno_t pktmnglr_ipfilter_input(void *cookie, mbuf_t *data, int offset, u i++; } } - error = mbuf_copyback(*data, - offset+sizeof(struct tcphdr), - orig_tcp_optlen, tcp_opt_buf, MBUF_WAITOK); - - if (error) { - PKT_MNGLR_LOG(LOG_ERR, - "Failed to copy tcp options"); - goto input_done; + + if (orig_tcp_optlen) { + error = mbuf_copyback(*data, + offset+sizeof(struct tcphdr), + orig_tcp_optlen, tcp_opt_buf, MBUF_WAITOK); + + if (error) { + PKT_MNGLR_LOG(LOG_ERR, + "Failed to copy tcp options back: error %d offset %d optlen %d", + error, offset, orig_tcp_optlen); + goto input_done; + } } } break; diff --git a/bsd/net/route.c b/bsd/net/route.c index 69fdd5937..1eda0fa30 100644 --- a/bsd/net/route.c +++ b/bsd/net/route.c @@ -1926,6 +1926,9 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, * necp client watchers to re-evaluate */ if (SA_DEFAULT(rt_key(rt))) { + if (rt->rt_ifp != NULL) { + ifnet_touch_lastupdown(rt->rt_ifp); + } necp_update_all_clients(); } #endif /* NECP */ @@ -2235,6 +2238,9 @@ rtrequest_common_locked(int req, struct sockaddr *dst0, * necp client watchers to re-evaluate */ if (SA_DEFAULT(rt_key(rt))) { + if (rt->rt_ifp != NULL) { + ifnet_touch_lastupdown(rt->rt_ifp); + } necp_update_all_clients(); } #endif /* NECP */ diff --git a/bsd/netinet/in_tclass.c b/bsd/netinet/in_tclass.c index 21fc03f44..ff40a2872 100644 --- a/bsd/netinet/in_tclass.c +++ b/bsd/netinet/in_tclass.c @@ -117,6 +117,10 @@ int net_qos_policy_wifi_enabled = 0; SYSCTL_INT(_net_qos_policy, OID_AUTO, wifi_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_wifi_enabled, 0, ""); +int net_qos_policy_none_wifi_enabled = 0; +SYSCTL_INT(_net_qos_policy, OID_AUTO, none_wifi_enabled, + CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_none_wifi_enabled, 0, ""); + int net_qos_policy_capable_enabled = 0; SYSCTL_INT(_net_qos_policy, OID_AUTO, capable_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &net_qos_policy_capable_enabled, 0, ""); diff --git a/bsd/netinet/in_tclass.h b/bsd/netinet/in_tclass.h index d42713162..a62203024 100644 --- a/bsd/netinet/in_tclass.h +++ b/bsd/netinet/in_tclass.h @@ -91,6 +91,7 @@ extern int net_qos_guideline(struct net_qos_param *param, size_t param_len); extern int net_qos_policy_restricted; extern int net_qos_policy_wifi_enabled; +extern int net_qos_policy_none_wifi_enabled; extern int net_qos_policy_capable_enabled; extern void net_qos_map_init(void); diff --git a/bsd/netinet/mptcp_subr.c b/bsd/netinet/mptcp_subr.c index a4a61beb2..226637cfa 100644 --- a/bsd/netinet/mptcp_subr.c +++ b/bsd/netinet/mptcp_subr.c @@ -1312,7 +1312,7 @@ mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom, mpte_lock(mpte); if (error) { mptcplog((LOG_ERR, "%s: subflow socreate mp_so 0x%llx unable to create subflow socket error %d\n", - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error), + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), error), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); proc_rele(p); diff --git a/bsd/netinet/mptcp_usrreq.c b/bsd/netinet/mptcp_usrreq.c index 8749d1a0a..14b623411 100644 --- a/bsd/netinet/mptcp_usrreq.c +++ b/bsd/netinet/mptcp_usrreq.c @@ -319,6 +319,11 @@ mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src, goto out; } + if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) { + error = EAFNOSUPPORT; + goto out; + } + if (dst->sa_family == AF_INET && dst->sa_len != sizeof(mpte->__mpte_dst_v4)) { mptcplog((LOG_ERR, "%s IPv4 dst len %u\n", __func__, @@ -351,6 +356,11 @@ mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src, } if (src) { + if (src->sa_family != AF_INET && src->sa_family != AF_INET6) { + error = EAFNOSUPPORT; + goto out; + } + if (src->sa_family == AF_INET && src->sa_len != sizeof(mpte->__mpte_src_v4)) { mptcplog((LOG_ERR, "%s IPv4 src len %u\n", __func__, diff --git a/bsd/netinet6/in6_src.c b/bsd/netinet6/in6_src.c index 42fe27ad4..ab987d2db 100644 --- a/bsd/netinet6/in6_src.c +++ b/bsd/netinet6/in6_src.c @@ -217,6 +217,58 @@ do { \ goto out; /* XXX: we can't use 'break' here */ \ } while (0) + +struct ifaddr * +in6_selectsrc_core_ifa(struct sockaddr_in6 *addr, struct ifnet *ifp, int srcsel_debug) { + int err = 0; + struct ifnet *src_ifp = NULL; + struct in6_addr src_storage = {}; + struct in6_addr *in6 = NULL; + struct ifaddr *ifa = NULL; + + if((in6 = in6_selectsrc_core(addr, + (ip6_prefer_tempaddr ? IPV6_SRCSEL_HINT_PREFER_TMPADDR : 0), + ifp, 0, &src_storage, &src_ifp, &err, &ifa)) == NULL) { + if (err == 0) + err = EADDRNOTAVAIL; + VERIFY(src_ifp == NULL); + if (ifa != NULL) { + IFA_REMREF(ifa); + ifa = NULL; + } + goto done; + } + + if (src_ifp != ifp) { + if (err == 0) + err = ENETUNREACH; + if (ifa != NULL) { + IFA_REMREF(ifa); + ifa = NULL; + } + goto done; + } + + VERIFY(ifa != NULL); + ifnet_lock_shared(ifp); + if ((ifa->ifa_debug & IFD_DETACHING) != 0) { + err = EHOSTUNREACH; + ifnet_lock_done(ifp); + if (ifa != NULL) { + IFA_REMREF(ifa); + ifa = NULL; + } + goto done; + } + ifnet_lock_done(ifp); + +done: + SASEL_LOG("Returned with error: %d", err); + if (src_ifp != NULL) + ifnet_release(src_ifp); + return (ifa); +} + struct in6_addr * in6_selectsrc_core(struct sockaddr_in6 *dstsock, uint32_t hint_mask, struct ifnet *ifp, int srcsel_debug, struct in6_addr *src_storage, diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h index 153ad9dc4..67fcd97bc 100644 --- a/bsd/netinet6/ip6_var.h +++ b/bsd/netinet6/ip6_var.h @@ -549,6 +549,7 @@ extern int dest6_input(struct mbuf **, int *, int); */ #define IPV6_SRCSEL_HINT_PREFER_TMPADDR 0x00000001 +extern struct ifaddr * in6_selectsrc_core_ifa(struct sockaddr_in6 *, struct ifnet *, int); extern struct in6_addr * in6_selectsrc_core(struct sockaddr_in6 *, uint32_t, struct ifnet *, int, struct in6_addr *, struct ifnet **, int *, struct ifaddr **); diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c index f5710e862..cde828a7b 100644 --- a/bsd/vfs/vfs_attrlist.c +++ b/bsd/vfs/vfs_attrlist.c @@ -1142,6 +1142,27 @@ getvolattrlist(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER); goto out; } + + if (return_valid && + (ab.allocated < (ssize_t)(sizeof(uint32_t) + sizeof(attribute_set_t))) && + !(options & FSOPT_REPORT_FULLSIZE)) { + uint32_t num_bytes_valid = sizeof(uint32_t); + /* + * Not enough to return anything and we don't have to report + * how much space is needed. Get out now. + * N.B. - We have only been called after having verified that + * attributeBuffer is at least sizeof(uint32_t); + */ + if (UIO_SEG_IS_USER_SPACE(segflg)) { + error = copyout(&num_bytes_valid, + CAST_USER_ADDR_T(attributeBuffer), num_bytes_valid); + } else { + bcopy(&num_bytes_valid, (void *)attributeBuffer, + (size_t)num_bytes_valid); + } + goto out; + } + MALLOC(ab.base, char *, ab.allocated, M_TEMP, M_ZERO | M_WAITOK); if (ab.base == NULL) { error = ENOMEM; @@ -1457,9 +1478,10 @@ getvolattrlist(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, * they gave us, so they can always check for truncation themselves. */ *(uint32_t *)ab.base = (options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); - + /* Return attribute set output if requested. */ - if (return_valid) { + if (return_valid && + (ab.allocated >= (ssize_t)(sizeof(uint32_t) + sizeof(ab.actual)))) { ab.actual.commonattr |= ATTR_CMN_RETURNED_ATTRS; if (pack_invalid) { /* Only report the attributes that are valid */ @@ -2775,6 +2797,9 @@ getattrlist_internal(vfs_context_t ctx, vnode_t vp, struct attrlist *alp, // must be true for fork attributes to be used as new common attributes const int use_fork = (options & FSOPT_ATTR_CMN_EXTENDED) != 0; + if (bufferSize < sizeof(uint32_t)) + return (ERANGE); + proc_is64 = proc_is64bit(vfs_context_proc(ctx)); if (segflg == UIO_USERSPACE) { diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index e94cb60c8..df5f607ae 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -2290,6 +2290,10 @@ extern uint32_t vm_page_pages; SYSCTL_UINT(_vm, OID_AUTO, pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_pages, 0, ""); #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) +extern int pacified_footprint_suspend; +int footprint_suspend_allowed = 0; +SYSCTL_INT(_vm, OID_AUTO, footprint_suspend_allowed, CTLFLAG_RW | CTLFLAG_LOCKED, &footprint_suspend_allowed, 0, ""); + extern void pmap_footprint_suspend(vm_map_t map, boolean_t suspend); static int sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS @@ -2305,7 +2309,20 @@ sysctl_vm_footprint_suspend SYSCTL_HANDLER_ARGS if (error) { return error; } + if (pacified_footprint_suspend && + !footprint_suspend_allowed) { + if (new_value != 0) { + /* suspends are not allowed... */ + return 0; + } + /* ... but let resumes proceed */ + } + DTRACE_VM2(footprint_suspend, + vm_map_t, current_map(), + int, new_value); + pmap_footprint_suspend(current_map(), new_value); + return 0; } SYSCTL_PROC(_vm, OID_AUTO, footprint_suspend, diff --git a/config/MasterVersion b/config/MasterVersion index 0996d29e7..47d54a4cd 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -17.5.0 +17.6.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index efe918ad4..6e535f66b 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -1890,7 +1890,7 @@ hibernate_write_image(void) if (kUnwiredEncrypt == pageType) { // start unwired image - if (kIOHibernateModeEncrypt & gIOHibernateMode) + if (!vars->hwEncrypt && (kIOHibernateModeEncrypt & gIOHibernateMode)) { vars->fileVars->encryptStart = (vars->fileVars->position & ~(((uint64_t)AES_BLOCK_SIZE) - 1)); vars->fileVars->encryptEnd = UINT64_MAX; @@ -2388,6 +2388,7 @@ hibernate_machine_init(void) &vars->volumeCryptKey[0], vars->volumeCryptKeySize); HIBLOG("IOPolledFilePollersSetEncryptionKey(%x)\n", err); if (kIOReturnSuccess != err) panic("IOPolledFilePollersSetEncryptionKey(0x%x)", err); + cryptvars = 0; } IOPolledFileSeek(vars->fileVars, gIOHibernateCurrentHeader->image1Size); diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 9dfb2f322..0d03e32ca 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -567,7 +567,14 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate( { // IOBufferMemoryDescriptor alloc - set flags for entry + object create prot |= MAP_MEM_NAMED_CREATE; - if (kIOMemoryBufferPurgeable & _flags) prot |= (MAP_MEM_PURGABLE | MAP_MEM_PURGABLE_KERNEL_ONLY); + if (kIOMemoryBufferPurgeable & _flags) + { + prot |= (MAP_MEM_PURGABLE | MAP_MEM_PURGABLE_KERNEL_ONLY); + if (VM_KERN_MEMORY_SKYWALK == tag) + { + prot |= MAP_MEM_LEDGER_TAG_NETWORK; + } + } if (kIOMemoryUseReserve & _flags) prot |= MAP_MEM_GRAB_SECLUDED; prot |= VM_PROT_WRITE; @@ -604,7 +611,7 @@ IOGeneralMemoryDescriptor::memoryReferenceCreate( else prot &= ~MAP_MEM_NAMED_REUSE; } - err = mach_make_memory_entry_64(map, + err = mach_make_memory_entry_internal(map, &actualSize, entryAddr, prot, &entry, cloneEntry); if (KERN_SUCCESS != err) break; diff --git a/iokit/Kernel/IOPolledInterface.cpp b/iokit/Kernel/IOPolledInterface.cpp index 780d64cd8..28256a35c 100644 --- a/iokit/Kernel/IOPolledInterface.cpp +++ b/iokit/Kernel/IOPolledInterface.cpp @@ -511,12 +511,14 @@ IOGetVolumeCryptKey(dev_t block_dev, OSString ** pKeyUUID, OSString * keyStoreUUID = 0; uuid_t volumeKeyUUID; aks_volume_key_t vek; + size_t callerKeySize; static IOService * sKeyStore; part = IOCopyMediaForDev(block_dev); if (!part) return (kIOReturnNotFound); + callerKeySize = *keySize; // Try APFS first { uuid_t volUuid = {0}; @@ -562,7 +564,7 @@ IOGetVolumeCryptKey(dev_t block_dev, OSString ** pKeyUUID, IOLog("volume key err 0x%x\n", err); else { - if (vek.key.keybytecount < *keySize) *keySize = vek.key.keybytecount; + if (vek.key.keybytecount <= callerKeySize) *keySize = vek.key.keybytecount; bcopy(&vek.key.keybytes[0], volumeCryptKey, *keySize); } bzero(&vek, sizeof(vek)); diff --git a/osfmk/arm64/locore.s b/osfmk/arm64/locore.s index 4430d3b5c..376e90195 100644 --- a/osfmk/arm64/locore.s +++ b/osfmk/arm64/locore.s @@ -400,6 +400,7 @@ Lel1_sp1_serror_vector_long: b fleh_dispatch64 .macro EL0_64_VECTOR + mov x18, xzr // Zero x18 to avoid leaking data to user SS stp x0, x1, [sp, #-16]! // Save x0 and x1 to the exception stack mrs x0, TPIDR_EL1 // Load the thread register mrs x1, SP_EL0 // Load the user stack pointer diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c index 5ab42e1af..1a84b2bfc 100644 --- a/osfmk/kdp/kdp_udp.c +++ b/osfmk/kdp/kdp_udp.c @@ -1363,7 +1363,7 @@ kdp_debugger_loop( if (kdp_flag & REBOOT_POST_CORE && dumped_kernel_core()) kdp_machine_reboot(); } else { - if ((kdp_flag & PANIC_CORE_ON_NMI) && panic_active() + if ((kdp_flag & PANIC_CORE_ON_NMI) && !panic_active() && !kdp.is_conn) { disableConsoleOutput = FALSE; diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index 7685299bd..c9cfd5167 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -628,6 +628,7 @@ kernel_bootstrap_thread(void) + /* * Start the user bootstrap. */ diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h index 399657d05..2a5def180 100644 --- a/osfmk/mach/memory_object_types.h +++ b/osfmk/mach/memory_object_types.h @@ -386,6 +386,7 @@ typedef struct memory_object_attr_info memory_object_attr_info_data_t; & 0xFF000000) | ((flags) & 0xFFFFFF)); /* leave room for vm_prot bits (0xFF ?) */ +#define MAP_MEM_LEDGER_TAG_NETWORK 0x002000 /* charge to "network" ledger */ #define MAP_MEM_PURGABLE_KERNEL_ONLY 0x004000 /* volatility controlled by kernel */ #define MAP_MEM_GRAB_SECLUDED 0x008000 /* can grab secluded pages */ #define MAP_MEM_ONLY 0x010000 /* change processor caching */ @@ -409,6 +410,9 @@ typedef struct memory_object_attr_info memory_object_attr_info_data_t; MAP_MEM_VM_COPY | \ MAP_MEM_VM_SHARE | \ MAP_MEM_4K_DATA_ADDR) +#define MAP_MEM_FLAGS_ALL ( \ + MAP_MEM_LEDGER_TAG_NETWORK | \ + MAP_MEM_FLAGS_USER) #ifdef KERNEL diff --git a/osfmk/vm/vm_init.c b/osfmk/vm/vm_init.c index e4944b459..326ac01a5 100644 --- a/osfmk/vm/vm_init.c +++ b/osfmk/vm/vm_init.c @@ -113,12 +113,31 @@ vm_mem_bootstrap_log(const char *message) * This is done only by the first cpu up. */ +int pacified_footprint_suspend = 0; +int pacified_purgeable_iokit = 0; + void vm_mem_bootstrap(void) { vm_offset_t start, end; vm_size_t zsizearg; mach_vm_size_t zsize; + int pacified; + + pacified = 0; + PE_parse_boot_argn("pacified", + &pacified, + sizeof (pacified)); + if (pacified) { + pacified_footprint_suspend = 1; + pacified_purgeable_iokit = 1; + } + PE_parse_boot_argn("pacified_footprint_suspend", + &pacified_footprint_suspend, + sizeof (pacified_footprint_suspend)); + PE_parse_boot_argn("pacified_purgeable_iokit", + &pacified_purgeable_iokit, + sizeof (pacified_purgeable_iokit)); /* * Initializes resident memory structures. diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index f6a8e5e5c..573c2a34e 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -16356,7 +16356,7 @@ vm_map_page_range_info_internal( } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT | PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) { /* alternate accounting */ - assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry); +// assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry); pmap_disp = 0; } else { if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) { diff --git a/osfmk/vm/vm_protos.h b/osfmk/vm/vm_protos.h index 54781267b..8044286f5 100644 --- a/osfmk/vm/vm_protos.h +++ b/osfmk/vm/vm_protos.h @@ -589,6 +589,14 @@ extern void memory_object_mark_eligible_for_secluded( #define MAX_PAGE_RANGE_QUERY (1ULL * 1024 * 1024 * 1024) /* 1 GB */ +extern kern_return_t mach_make_memory_entry_internal( + vm_map_t target_map, + memory_object_size_t *size, + memory_object_offset_t offset, + vm_prot_t permission, + ipc_port_t *object_handle, + ipc_port_t parent_handle); + #ifdef __cplusplus } #endif diff --git a/osfmk/vm/vm_purgeable.c b/osfmk/vm/vm_purgeable.c index 84abed964..4606d9591 100644 --- a/osfmk/vm/vm_purgeable.c +++ b/osfmk/vm/vm_purgeable.c @@ -1627,6 +1627,10 @@ vm_purgeable_accounting( task_ledgers.phys_footprint, ptoa(wired_page_count)); + /* no more accounting for this dead object */ + if (! task_objq_locked) { + task_objq_lock(owner); + } if (!disown_on_the_fly && (object->purgeable_queue_type == PURGEABLE_Q_TYPE_MAX)) { @@ -1639,20 +1643,15 @@ vm_purgeable_accounting( /* on a volatile queue */ vm_purgeable_volatile_owner_update(owner, -1); } - /* no more accounting for this dead object */ - owner = object->vo_purgeable_owner; - if (! task_objq_locked) { - task_objq_lock(owner); - } task_objq_lock_assert_owned(owner); queue_remove(&owner->task_objq, object, vm_object_t, task_objq); - if (! task_objq_locked) { - task_objq_unlock(owner); - } object->vo_purgeable_owner = NULL; #if DEBUG object->vo_purgeable_volatilizer = NULL; #endif /* DEBUG */ + if (! task_objq_locked) { + task_objq_unlock(owner); + } return; } @@ -1697,25 +1696,25 @@ vm_purgeable_accounting( task_ledgers.phys_footprint, ptoa(wired_page_count)); + /* no more accounting for this dead object */ + if (! task_objq_locked) { + task_objq_lock(owner); + } /* one less "non-volatile" object for the owner */ if (!disown_on_the_fly) { assert(object->purgeable_queue_type == PURGEABLE_Q_TYPE_MAX); } vm_purgeable_nonvolatile_owner_update(owner, -1); - /* no more accounting for this dead object */ - if (! task_objq_locked) { - task_objq_lock(owner); - } task_objq_lock_assert_owned(owner); queue_remove(&owner->task_objq, object, vm_object_t, task_objq); - if (! task_objq_locked) { - task_objq_unlock(owner); - } object->vo_purgeable_owner = NULL; #if DEBUG object->vo_purgeable_volatilizer = NULL; #endif /* DEBUG */ + if (! task_objq_locked) { + task_objq_unlock(owner); + } return; } /* more volatile bytes in ledger */ diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index f199dd4fe..a1e2c51c3 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -2192,6 +2192,32 @@ mach_make_memory_entry_64( vm_prot_t permission, ipc_port_t *object_handle, ipc_port_t parent_handle) +{ + if ((permission & MAP_MEM_FLAGS_MASK) & ~MAP_MEM_FLAGS_USER) { + /* + * Unknown flag: reject for forward compatibility. + */ + return KERN_INVALID_VALUE; + } + + return mach_make_memory_entry_internal(target_map, + size, + offset, + permission, + object_handle, + parent_handle); +} + +extern int pacified_purgeable_iokit; + +kern_return_t +mach_make_memory_entry_internal( + vm_map_t target_map, + memory_object_size_t *size, + memory_object_offset_t offset, + vm_prot_t permission, + ipc_port_t *object_handle, + ipc_port_t parent_handle) { vm_map_version_t version; vm_named_entry_t parent_entry; @@ -2234,7 +2260,7 @@ mach_make_memory_entry_64( boolean_t use_data_addr; boolean_t use_4K_compat; - if ((permission & MAP_MEM_FLAGS_MASK) & ~MAP_MEM_FLAGS_USER) { + if ((permission & MAP_MEM_FLAGS_MASK) & ~MAP_MEM_FLAGS_ALL) { /* * Unknown flag: reject for forward compatibility. */ @@ -2344,12 +2370,22 @@ mach_make_memory_entry_64( assert(object->resident_page_count == 0); assert(object->wired_page_count == 0); vm_object_lock(object); - if (object->purgeable_only_by_kernel) { - vm_purgeable_nonvolatile_enqueue(object, - kernel_task); + if (pacified_purgeable_iokit) { + if (permission & MAP_MEM_LEDGER_TAG_NETWORK) { + vm_purgeable_nonvolatile_enqueue(object, + kernel_task); + } else { + vm_purgeable_nonvolatile_enqueue(object, + current_task()); + } } else { - vm_purgeable_nonvolatile_enqueue(object, - current_task()); + if (object->purgeable_only_by_kernel) { + vm_purgeable_nonvolatile_enqueue(object, + kernel_task); + } else { + vm_purgeable_nonvolatile_enqueue(object, + current_task()); + } } vm_object_unlock(object); } diff --git a/osfmk/x86_64/idt64.s b/osfmk/x86_64/idt64.s index 0996137ba..54a43ec98 100644 --- a/osfmk/x86_64/idt64.s +++ b/osfmk/x86_64/idt64.s @@ -156,11 +156,53 @@ Entry(idt64_page_fault) push $(T_PAGE_FAULT) jmp L_dispatch +/* + * #DB handler, which runs on IST1, will treat as spurious any #DB received while executing in the + * kernel while not on the kernel's gsbase. + */ Entry(idt64_debug) + /* Synthesize common interrupt stack frame */ push $0 /* error code */ pushq $(HNDL_ALLTRAPS) pushq $(T_DEBUG) - jmp L_dispatch + /* Spill prior to RDMSR */ + push %rax + push %rcx + push %rdx + mov $(MSR_IA32_GS_BASE), %ecx + rdmsr /* Check contents of GSBASE MSR */ + test $0x80000000, %edx /* MSB set? Already swapped to kernel's */ + jnz 1f + + /* + * If we're not already swapped to the kernel's gsbase AND this #DB originated from kernel space, + * it must have happened within the very small window on entry or exit before or after (respectively) + * swapgs occurred. In those cases, consider the #DB spurious and immediately return. + */ + testb $3, 8+8+8+ISF64_CS(%rsp) + jnz 2f + pop %rdx + pop %rcx + pop %rax + addq $0x18, %rsp /* Remove synthesized interrupt stack frame */ + jmp EXT(ret64_iret) +2: + swapgs /* direct from user */ +1: + pop %rdx + + leaq EXT(idt64_hndl_table0)(%rip), %rax + mov 16(%rax), %rax /* Offset of per-CPU shadow */ + mov %gs:CPU_TASK_CR3(%rax), %rax + mov %rax, %cr3 + + pop %rcx + + /* Note that %rax will be popped from the stack in ks_dispatch, below */ + + leaq EXT(idt64_hndl_table0)(%rip), %rax + jmp *(%rax) + /* * Legacy interrupt gate System call handlers. * These are entered via a syscall interrupt. The system call number in %rax diff --git a/osfmk/x86_64/idt_table.h b/osfmk/x86_64/idt_table.h index 10d590278..639516a56 100644 --- a/osfmk/x86_64/idt_table.h +++ b/osfmk/x86_64/idt_table.h @@ -27,7 +27,7 @@ */ TRAP(0x00,idt64_zero_div) -TRAP_SPC(0x01,idt64_debug) +TRAP_IST1(0x01,idt64_debug) TRAP_IST2(0x02,idt64_nmi) USER_TRAP(0x03,idt64_int3) USER_TRAP(0x04,idt64_into) diff --git a/tools/lldbmacros/pmap.py b/tools/lldbmacros/pmap.py index 9b4f711e4..6f9bbb6ec 100755 --- a/tools/lldbmacros/pmap.py +++ b/tools/lldbmacros/pmap.py @@ -118,7 +118,7 @@ def KDPWritePhysMEM(address, intval, bits): if not WriteInt32ToMemoryAddress(0, input_address): return False - kdp_pkt_size = GetType('kdp_writephysmem64_req_t').GetByteSize() + kdp_pkt_size = GetType('kdp_writephysmem64_req_t').GetByteSize() + (bits / 8) if not WriteInt32ToMemoryAddress(kdp_pkt_size, len_address): return False diff --git a/tools/tests/darwintests/net_tuntests.c b/tools/tests/darwintests/net_tuntests.c index 6b5fd97b8..91363ab63 100644 --- a/tools/tests/darwintests/net_tuntests.c +++ b/tools/tests/darwintests/net_tuntests.c @@ -314,26 +314,6 @@ create_sa(const char ifname[IFXNAMSIZ], uint8_t type, uint32_t spi, struct in_ad T_QUIET; T_EXPECT_EQ(slen, (ssize_t)sizeof(addcmd), NULL); } -/* Unfortunately, connect can return EBUSY due to: - * Always return EBUSY if interface with the same name is in delayed detach even if the unique ID is different. - * - * We should fix that so we don't return EBUSY when we aren't - * requesting a specific interface name, but until then workaround it - * in the test. - */ - -static int -try_connect(int socket, const struct sockaddr *address, socklen_t address_len) -{ - int ret; - while (1) { - ret = connect(socket, address, address_len); - if (ret != -1 || errno != EBUSY) - return ret; - sleep(1); - } -} - static int create_tunsock(int enable_netif, int enable_flowswitch, int enable_channel) { @@ -343,6 +323,8 @@ create_tunsock(int enable_netif, int enable_flowswitch, int enable_channel) uuid_t uuid; socklen_t uuidlen; +startover: + T_QUIET; T_EXPECT_POSIX_SUCCESS(tunsock = socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL), NULL); memset(&kernctl_info, 0, sizeof(kernctl_info)); @@ -387,7 +369,17 @@ create_tunsock(int enable_netif, int enable_flowswitch, int enable_channel) T_QUIET; T_EXPECT_EQ_ULONG((unsigned long )uuidlen, sizeof(uuid_t), NULL); T_QUIET; T_EXPECT_TRUE(uuid_is_null(uuid), NULL); - T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(try_connect(tunsock, (struct sockaddr *)&kernctl_addr, sizeof(kernctl_addr)), NULL); + int error = connect(tunsock, (struct sockaddr *)&kernctl_addr, sizeof(kernctl_addr)); + if (error == -1 && errno == EBUSY) { + /* XXX remove this retry nonsense when this is fixed: + * creating an interface without specifying specific interface name should not return EBUSY + */ + close(tunsock); + T_LOG("connect got EBUSY, sleeping 1 second before retry"); + sleep(1); + goto startover; + } + T_QUIET; T_WITH_ERRNO; T_EXPECT_POSIX_ZERO(error, "connect()"); T_QUIET; T_EXPECT_POSIX_FAILURE(setsockopt(tunsock, SYSPROTO_CONTROL, g_OPT_ENABLE_NETIF, &enable_netif, sizeof(enable_netif)), EINVAL, NULL);