diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index 25395d352..22a932314 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -94,6 +94,7 @@ #include #include #include +#include #include @@ -711,6 +712,9 @@ bsd_init(void) panic("bsd_init: Failed to allocate bsd pageable map"); } + bsd_init_kprintf("calling fpxlog_init\n"); + fpxlog_init(); + /* * Initialize buffers and hash links for buffers * diff --git a/bsd/kern/bsd_stubs.c b/bsd/kern/bsd_stubs.c index 7883a1b75..1867151e9 100644 --- a/bsd/kern/bsd_stubs.c +++ b/bsd/kern/bsd_stubs.c @@ -45,7 +45,7 @@ /* XXX these should be in a common header somwhere, but aren't */ extern int chrtoblk_set(int, int); -extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int); +extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int, kern_return_t *); /* XXX most of these just exist to export; there's no good header for them*/ void pcb_synch(void); @@ -58,7 +58,7 @@ lck_grp_t * devsw_lock_grp; int dmmin, dmmax, dmtext; vm_offset_t -kmem_mb_alloc(vm_map_t mbmap, int size, int physContig) +kmem_mb_alloc(vm_map_t mbmap, int size, int physContig, kern_return_t *err) { vm_offset_t addr = 0; kern_return_t kr = KERN_SUCCESS; @@ -70,6 +70,8 @@ kmem_mb_alloc(vm_map_t mbmap, int size, int physContig) if (kr != KERN_SUCCESS) addr = 0; + if (err) + *err = kr; return addr; } diff --git a/bsd/kern/kern_asl.c b/bsd/kern/kern_asl.c index 3d97f8113..3972cda2d 100644 --- a/bsd/kern/kern_asl.c +++ b/bsd/kern/kern_asl.c @@ -190,6 +190,13 @@ kern_asl_msg_va(int level, const char *facility, int num_pairs, va_list vargs, . /* Print the key-value pairs in ASL format */ vaddlog(fmt, vargs); + /* + * Note: can't use os_log_with_args() here because 'fmt' is + * constructed on the stack i.e. doesn't come from a text + * section. More importantly, the newer logging system + * doesn't grok ASL either. + */ + return (err); } diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index 9412a4472..df43f2013 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -3115,7 +3115,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) if (error) { DTRACE_PROC1(exec__failure, int, error); } else { - DTRACE_PROC(exec__success); + dtrace_thread_didexec(imgp->ip_new_thread); } } @@ -3124,8 +3124,7 @@ posix_spawn(proc_t ap, struct posix_spawn_args *uap, int32_t *retval) } #endif /* - * exec-success dtrace probe fired, clear bsd_info from - * old task if it did exec. + * clear bsd_info from old task if it did exec. */ if (task_did_exec(current_task())) { set_bsdtask_info(current_task(), NULL); @@ -3577,9 +3576,10 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) } #endif /* CONFIG_MACF */ - DTRACE_PROC(exec__success); #if CONFIG_DTRACE + dtrace_thread_didexec(imgp->ip_new_thread); + if ((dtrace_proc_waitfor_hook = dtrace_proc_waitfor_exec_ptr) != NULL) (*dtrace_proc_waitfor_hook)(p); #endif @@ -3594,8 +3594,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval) exit_with_error: /* - * exec-success dtrace probe fired, clear bsd_info from - * old task if it did exec. + * clear bsd_info from old task if it did exec. */ if (task_did_exec(current_task())) { set_bsdtask_info(current_task(), NULL); diff --git a/bsd/kern/kern_ntptime.c b/bsd/kern/kern_ntptime.c index a922c3676..ccbfece8d 100644 --- a/bsd/kern/kern_ntptime.c +++ b/bsd/kern/kern_ntptime.c @@ -80,6 +80,7 @@ #include #endif #include +#include typedef int64_t l_fp; #define L_ADD(v, u) ((v) += (u)) @@ -216,6 +217,11 @@ static void ntp_loop_update_call(void); static void refresh_ntp_loop(void); static void start_ntp_loop(void); +#if DEVELOPMENT || DEBUG +uint32_t g_should_log_clock_adjustments = 0; +SYSCTL_INT(_kern, OID_AUTO, log_clock_adjustments, CTLFLAG_RW | CTLFLAG_LOCKED, &g_should_log_clock_adjustments, 0, "enable kernel clock adjustment logging"); +#endif + static bool ntp_is_time_error(int tsl) { @@ -326,6 +332,12 @@ ntp_adjtime(struct proc *p, struct ntp_adjtime_args *uap, __unused int32_t *retv if (error) return (error); +#if DEVELOPEMNT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s:BEFORE modes %u offset %ld freq %ld status %d constant %ld time_adjtime %lld\n", + __func__, ntv.modes, ntv.offset, ntv.freq, ntv.status, ntv.constant, time_adjtime); + } +#endif /* * Update selected clock variables - only the superuser can * change anything. Note that there is no error checking here on @@ -415,6 +427,13 @@ ntp_adjtime(struct proc *p, struct ntp_adjtime_args *uap, __unused int32_t *retv ret = ntp_is_time_error(time_status) ? TIME_ERROR : time_state; +#if DEVELOPEMNT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s:AFTER offset %lld freq %lld status %d constant %ld time_adjtime %lld\n", + __func__, time_offset, time_freq, time_status, time_constant, time_adjtime); + } +#endif + /* * Retrieve all clock variables. Note that the TAI offset is * returned only by ntp_gettime(); @@ -535,6 +554,18 @@ ntp_update_second(int64_t *adjustment, clock_sec_t secs) updated = 0; } +#if DEVELOPEMNT || DEBUG + if (g_should_log_clock_adjustments) { + int64_t nano = (time_adj > 0)? time_adj >> 32 : -((-time_adj) >> 32); + int64_t frac = (time_adj > 0)? ((uint32_t) time_adj) : -((uint32_t) (-time_adj)); + + os_log(OS_LOG_DEFAULT, "%s:AFTER offset %lld (%lld) freq %lld status %d " + "constant %ld time_adjtime %lld nano %lld frac %lld adj %lld\n", + __func__, time_offset, (time_offset > 0)? time_offset >> 32 : -((-time_offset) >> 32), + time_freq, time_status, time_constant, time_adjtime, nano, frac, time_adj); + } +#endif + *adjustment = time_adj; } @@ -622,6 +653,12 @@ kern_adjtime(struct timeval *delta) NTP_LOCK(enable); ltr = time_adjtime; time_adjtime = ltw; +#if DEVELOPEMNT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s:AFTER offset %lld freq %lld status %d constant %ld time_adjtime %lld\n", + __func__, time_offset, time_freq, time_status, time_constant, time_adjtime); + } +#endif NTP_UNLOCK(enable); atv.tv_sec = ltr / (int64_t)USEC_PER_SEC; diff --git a/bsd/kern/subr_log.c b/bsd/kern/subr_log.c index 5a748b5b5..cde2de0be 100644 --- a/bsd/kern/subr_log.c +++ b/bsd/kern/subr_log.c @@ -100,7 +100,7 @@ #include /* XXX should be in a common header somewhere */ -extern void logwakeup(void); +extern void logwakeup(struct msgbuf *); extern void oslogwakeup(void); extern void oslog_streamwakeup(void); static void oslog_streamwakeup_locked(void); @@ -117,17 +117,21 @@ extern uint32_t oslog_s_error_count; #define LOG_ASYNC 0x04 #define LOG_RDWAIT 0x08 -#define MAX_UNREAD_CHARS (CONFIG_MSG_BSIZE/2) /* All globals should be accessed under LOG_LOCK() */ +static char amsg_bufc[1024]; +static struct msgbuf aslbuf = {MSG_MAGIC, sizeof (amsg_bufc), 0, 0, amsg_bufc}; +struct msgbuf *aslbufp __attribute__((used)) = &aslbuf; + /* logsoftc only valid while log_open=1 */ struct logsoftc { int sc_state; /* see above for possibilities */ struct selinfo sc_selp; /* thread waiting for select */ int sc_pgid; /* process/group for async I/O */ + struct msgbuf *sc_mbp; } logsoftc; -int log_open; /* also used in log() */ +static int log_open; char smsg_bufc[CONFIG_MSG_BSIZE]; /* static buffer */ char oslog_stream_bufc[FIREHOSE_CHUNK_SIZE]; /* static buffer */ struct firehose_chunk_s oslog_boot_buf = { @@ -140,8 +144,8 @@ struct firehose_chunk_s oslog_boot_buf = { }, }; /* static buffer */ firehose_chunk_t firehose_boot_chunk = &oslog_boot_buf; -struct msgbuf msgbuf = {MSG_MAGIC,sizeof(smsg_bufc),0,0,smsg_bufc}; -struct msgbuf oslog_stream_buf = {MSG_MAGIC,0,0,0,NULL}; +struct msgbuf msgbuf = {MSG_MAGIC, sizeof(smsg_bufc), 0, 0, smsg_bufc}; +struct msgbuf oslog_stream_buf = {MSG_MAGIC, 0, 0, 0, NULL}; struct msgbuf *msgbufp __attribute__((used)) = &msgbuf; struct msgbuf *oslog_streambufp __attribute__((used)) = &oslog_stream_buf; @@ -168,7 +172,7 @@ struct oslog_streamsoftc { int sc_state; /* see above for possibilities */ struct selinfo sc_selp; /* thread waiting for select */ int sc_pgid; /* process/group for async I/O */ -}oslog_streamsoftc; +} oslog_streamsoftc; STAILQ_HEAD(, oslog_stream_buf_entry_s) oslog_stream_free_head = STAILQ_HEAD_INITIALIZER(oslog_stream_free_head); @@ -233,9 +237,7 @@ static void oslog_streamwrite_append_bytes(const char *buffer, int buflen); #endif static int sysctl_kern_msgbuf(struct sysctl_oid *oidp, - void *arg1, - int arg2, - struct sysctl_req *req); + void *arg1, int arg2, struct sysctl_req *req); /*ARGSUSED*/ int @@ -246,6 +248,16 @@ logopen(__unused dev_t dev, __unused int flags, __unused int mode, struct proc * LOG_UNLOCK(); return (EBUSY); } + if (atm_get_diagnostic_config() & ATM_ENABLE_LEGACY_LOGGING) { + logsoftc.sc_mbp = msgbufp; + } else { + /* + * Support for messagetracer (kern_asl_msg()) + * In this mode, /dev/klog exports only ASL-formatted messages + * written into aslbufp via vaddlog(). + */ + logsoftc.sc_mbp = aslbufp; + } logsoftc.sc_pgid = p->p_pid; /* signal process only */ log_open = 1; @@ -408,9 +420,10 @@ logread(__unused dev_t dev, struct uio *uio, int flag) { int l; int error = 0; + struct msgbuf *mbp = logsoftc.sc_mbp; LOG_LOCK(); - while (msgbufp->msg_bufr == msgbufp->msg_bufx) { + while (mbp->msg_bufr == mbp->msg_bufx) { if (flag & IO_NDELAY) { error = EWOULDBLOCK; goto out; @@ -425,7 +438,7 @@ logread(__unused dev_t dev, struct uio *uio, int flag) * If the wakeup is missed * then wait for 5 sec and reevaluate */ - if ((error = tsleep((caddr_t)msgbufp, LOG_RDPRI | PCATCH, + if ((error = tsleep((caddr_t)mbp, LOG_RDPRI | PCATCH, "klog", 5 * hz)) != 0) { /* if it times out; ignore */ if (error != EWOULDBLOCK) @@ -438,23 +451,22 @@ logread(__unused dev_t dev, struct uio *uio, int flag) while (uio_resid(uio) > 0) { int readpos; - l = msgbufp->msg_bufx - msgbufp->msg_bufr; + l = mbp->msg_bufx - mbp->msg_bufr; if (l < 0) - l = msgbufp->msg_size - msgbufp->msg_bufr; + l = mbp->msg_size - mbp->msg_bufr; l = min(l, uio_resid(uio)); if (l == 0) break; - readpos = msgbufp->msg_bufr; + readpos = mbp->msg_bufr; LOG_UNLOCK(); - error = uiomove((caddr_t)&msgbufp->msg_bufc[readpos], - l, uio); + error = uiomove((caddr_t)&mbp->msg_bufc[readpos], l, uio); LOG_LOCK(); if (error) break; - msgbufp->msg_bufr = readpos + l; - if (msgbufp->msg_bufr >= msgbufp->msg_size) - msgbufp->msg_bufr = 0; + mbp->msg_bufr = readpos + l; + if (mbp->msg_bufr >= mbp->msg_size) + mbp->msg_bufr = 0; } out: LOG_UNLOCK(); @@ -588,11 +600,13 @@ oslog_streamread(__unused dev_t dev, struct uio *uio, int flag) int logselect(__unused dev_t dev, int rw, void * wql, struct proc *p) { + const struct msgbuf *mbp = logsoftc.sc_mbp; + switch (rw) { case FREAD: LOG_LOCK(); - if (msgbufp->msg_bufr != msgbufp->msg_bufx) { + if (mbp->msg_bufr != mbp->msg_bufx) { LOG_UNLOCK(); return (1); } @@ -643,10 +657,8 @@ oslog_streamselect(__unused dev_t dev, int rw, void * wql, struct proc *p) } void -logwakeup(void) +logwakeup(struct msgbuf *mbp) { - int pgid; - /* cf. r24974766 & r25201228*/ if (oslog_is_safe() == FALSE) { return; @@ -657,9 +669,13 @@ logwakeup(void) LOG_UNLOCK(); return; } + if (NULL == mbp) + mbp = logsoftc.sc_mbp; + if (mbp != logsoftc.sc_mbp) + goto out; selwakeup(&logsoftc.sc_selp); if (logsoftc.sc_state & LOG_ASYNC) { - pgid = logsoftc.sc_pgid; + int pgid = logsoftc.sc_pgid; LOG_UNLOCK(); if (pgid < 0) gsignal(-pgid, SIGIO); @@ -668,9 +684,10 @@ logwakeup(void) LOG_LOCK(); } if (logsoftc.sc_state & LOG_RDWAIT) { - wakeup((caddr_t)msgbufp); + wakeup((caddr_t)mbp); logsoftc.sc_state &= ~LOG_RDWAIT; } +out: LOG_UNLOCK(); } @@ -719,15 +736,16 @@ int logioctl(__unused dev_t dev, u_long com, caddr_t data, __unused int flag, __unused struct proc *p) { int l; + const struct msgbuf *mbp = logsoftc.sc_mbp; LOG_LOCK(); switch (com) { /* return number of characters immediately available */ case FIONREAD: - l = msgbufp->msg_bufx - msgbufp->msg_bufr; + l = mbp->msg_bufx - mbp->msg_bufr; if (l < 0) - l += msgbufp->msg_size; + l += mbp->msg_size; *(off_t *)data = l; break; @@ -892,13 +910,10 @@ oslog_init(void) * SMP reentrancy. */ void -log_putc_locked(char c) +log_putc_locked(struct msgbuf *mbp, char c) { - struct msgbuf *mbp; - - mbp = msgbufp; mbp->msg_bufc[mbp->msg_bufx++] = c; - if (mbp->msg_bufx >= msgbufp->msg_size) + if (mbp->msg_bufx >= mbp->msg_size) mbp->msg_bufx = 0; } @@ -953,7 +968,8 @@ oslog_streamwrite_metadata_locked(oslog_stream_buf_entry_t m_entry) return; } -static void oslog_streamwrite_append_bytes(const char *buffer, int buflen) +static void +oslog_streamwrite_append_bytes(const char *buffer, int buflen) { struct msgbuf *mbp; @@ -1080,7 +1096,7 @@ oslog_streamwrite_locked(firehose_tracepoint_id_u ftid, * * Returns: (void) * - * Notes: This function is used for syingle byte output to the log. It + * Notes: This function is used for single byte output to the log. It * primarily exists to maintain binary backward compatibility. */ void @@ -1088,14 +1104,14 @@ log_putc(char c) { int unread_count = 0; LOG_LOCK(); - log_putc_locked(c); + log_putc_locked(msgbufp, c); unread_count = msgbufp->msg_bufx - msgbufp->msg_bufr; LOG_UNLOCK(); if (unread_count < 0) unread_count = 0 - unread_count; - if (c == '\n' || unread_count >= MAX_UNREAD_CHARS) - logwakeup(); + if (c == '\n' || unread_count >= (msgbufp->msg_size / 2)) + logwakeup(msgbufp); } @@ -1109,7 +1125,8 @@ log_putc(char c) * memory is dynamically allocated. Memory management must already be up. */ int -log_setsize(int size) { +log_setsize(int size) +{ char *new_logdata; int new_logsize, new_bufr, new_bufx; char *old_logdata; @@ -1210,12 +1227,13 @@ void oslog_setsize(int size) printf("oslog_setsize: new buffer size = %d, new num entries= %d\n", oslog_stream_buf_size, oslog_stream_num_entries); } -SYSCTL_PROC(_kern, OID_AUTO, msgbuf, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_kern_msgbuf, "I", ""); +SYSCTL_PROC(_kern, OID_AUTO, msgbuf, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, + sysctl_kern_msgbuf, "I", ""); -static int sysctl_kern_msgbuf(struct sysctl_oid *oidp __unused, - void *arg1 __unused, - int arg2 __unused, - struct sysctl_req *req) +static int +sysctl_kern_msgbuf(struct sysctl_oid *oidp __unused, + void *arg1 __unused, int arg2 __unused, struct sysctl_req *req) { int old_bufsize, bufsize; int error; @@ -1241,7 +1259,8 @@ static int sysctl_kern_msgbuf(struct sysctl_oid *oidp __unused, * It returns as much data still in the buffer as possible. */ int -log_dmesg(user_addr_t buffer, uint32_t buffersize, int32_t * retval) { +log_dmesg(user_addr_t buffer, uint32_t buffersize, int32_t * retval) +{ uint32_t i; uint32_t localbuff_size; int error = 0, newl, skip; diff --git a/bsd/kern/subr_prf.c b/bsd/kern/subr_prf.c index f254816fa..840fd1e75 100644 --- a/bsd/kern/subr_prf.c +++ b/bsd/kern/subr_prf.c @@ -136,9 +136,8 @@ extern int __doprnt(const char *fmt, /* * Record cpu that panic'd and lock around panic data */ -static void printn(uint32_t n, int b, int flags, struct tty *ttyp, int zf, int fld_size); -extern void logwakeup(void); +extern void logwakeup(struct msgbuf *); extern void halt_cpu(void); static void @@ -218,8 +217,6 @@ tprintf(tpr_t tpr, const char *fmt, ...) va_list ap; struct putchar_args pca; - logpri(LOG_INFO); - if (sess && (tp = SESSION_TP(sess)) != TTY_NULL) { /* ttycheckoutq(), tputchar() require a locked tp */ tty_lock(tp); @@ -235,12 +232,12 @@ tprintf(tpr_t tpr, const char *fmt, ...) } pca.flags = TOLOG; - pca.tty = TTY_NULL; + pca.tty = TTY_NULL; va_start(ap, fmt); __doprnt(fmt, ap, putchar, &pca, 10, TRUE); va_end(ap); - logwakeup(); + logwakeup(msgbufp); va_start(ap, fmt); os_log_with_args(OS_LOG_DEFAULT, OS_LOG_TYPE_DEFAULT, fmt, ap, __builtin_return_address(0)); @@ -272,55 +269,39 @@ ttyprintf(struct tty *tp, const char *fmt, ...) } } - -extern int log_open; - - void -logpri(int level) +logtime(time_t secs) { - struct putchar_args pca; - pca.flags = TOLOG; - pca.tty = NULL; - - putchar('<', &pca); - printn((uint32_t)level, 10, TOLOG, (struct tty *)0, 0, 0); - putchar('>', &pca); + printf("Time %ld Message ", secs); } static void -_logtime(const char *fmt, ...) +putchar_asl(int c, void *arg) { - va_list ap; - va_start(ap, fmt); - vaddlog(fmt, ap); - va_end(ap); -} + struct putchar_args *pca = arg; -void -logtime(time_t secs) -{ - _logtime(" 0 [Time %ld] [Message ", secs); + if ((pca->flags & TOLOGLOCKED) && c != '\0' && c != '\r' && c != 0177) + log_putc_locked(aslbufp, c); + putchar(c, arg); } +/* + * Vestigial support for kern_asl_msg() via /dev/klog + */ int vaddlog(const char *fmt, va_list ap) { - struct putchar_args pca; - - pca.flags = TOLOGLOCKED; - pca.tty = NULL; - - if (!log_open) { - pca.flags |= TOCONS; - } + struct putchar_args pca = { + .flags = TOLOGLOCKED, + .tty = NULL, + }; bsd_log_lock(); - __doprnt(fmt, ap, putchar, &pca, 10, TRUE); + __doprnt(fmt, ap, putchar_asl, &pca, 10, TRUE); bsd_log_unlock(); - - logwakeup(); - return 0; + logwakeup(NULL); + + return (0); } void @@ -356,43 +337,6 @@ prf(const char *fmt, va_list ap, int flags, struct tty *ttyp) return 0; } -/* - * Printn prints a number n in base b. - * We don't use recursion to avoid deep kernel stacks. - */ -static void -printn(uint32_t n, int b, int flags, struct tty *ttyp, int zf, int fld_size) -{ - char prbuf[11]; - char *cp; - struct putchar_args pca; - - pca.flags = flags; - pca.tty = ttyp; - - if (b == 10 && (int)n < 0) { - putchar('-', &pca); - n = (unsigned)(-(int)n); - } - cp = prbuf; - do { - *cp++ = "0123456789abcdef"[n%b]; - n /= b; - } while (n); - if (fld_size) { - for (fld_size -= cp - prbuf; fld_size > 0; fld_size--) - if (zf) - putchar('0', &pca); - else - putchar(' ', &pca); - } - do - putchar(*--cp, &pca); - while (cp > prbuf); -} - - - /* * Warn that a system table is full. */ @@ -428,7 +372,7 @@ putchar(int c, void *arg) if ((pca->flags & TOLOG) && c != '\0' && c != '\r' && c != 0177) log_putc(c); if ((pca->flags & TOLOGLOCKED) && c != '\0' && c != '\r' && c != 0177) - log_putc_locked(c); + log_putc_locked(msgbufp, c); if ((pca->flags & TOCONS) && constty == 0 && c != '\0') (*v_putc)(c); if (pca->flags & TOSTR) { diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index 04df5a515..57915610c 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -303,10 +303,27 @@ /* TODO: should be in header file */ /* kernel translater */ -extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int); +extern vm_offset_t kmem_mb_alloc(vm_map_t, int, int, kern_return_t *); extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); extern vm_map_t mb_map; /* special map */ +static uint32_t mb_kmem_contig_failed; +static uint32_t mb_kmem_failed; +static uint32_t mb_kmem_one_failed; +/* Timestamp of allocation failures. */ +static uint64_t mb_kmem_contig_failed_ts; +static uint64_t mb_kmem_failed_ts; +static uint64_t mb_kmem_one_failed_ts; +static uint64_t mb_kmem_contig_failed_size; +static uint64_t mb_kmem_failed_size; +static uint32_t mb_kmem_stats[6]; +static const char *mb_kmem_stats_labels[] = { "INVALID_ARGUMENT", + "INVALID_ADDRESS", + "RESOURCE_SHORTAGE", + "NO_SPACE", + "KERN_FAILURE", + "OTHERS" }; + /* Global lock */ decl_lck_mtx_data(static, mbuf_mlock_data); static lck_mtx_t *mbuf_mlock = &mbuf_mlock_data; @@ -315,7 +332,16 @@ static lck_grp_t *mbuf_mlock_grp; static lck_grp_attr_t *mbuf_mlock_grp_attr; /* Back-end (common) layer */ +static uint64_t mb_expand_cnt; +static uint64_t mb_expand_cl_cnt; +static uint64_t mb_expand_cl_total; +static uint64_t mb_expand_bigcl_cnt; +static uint64_t mb_expand_bigcl_total; +static uint64_t mb_expand_16kcl_cnt; +static uint64_t mb_expand_16kcl_total; static boolean_t mbuf_worker_needs_wakeup; /* wait channel for mbuf worker */ +static uint32_t mbuf_worker_run_cnt; +static uint64_t mbuf_worker_last_runtime; static int mbuf_worker_ready; /* worker thread is runnable */ static int ncpu; /* number of CPUs */ static ppnum_t *mcl_paddr; /* Array of cluster physical addresses */ @@ -655,7 +681,7 @@ boolean_t mb_peak_firstreport = FALSE; static struct timeval mb_wdtstart; /* watchdog start timestamp */ static char *mbuf_dump_buf; -#define MBUF_DUMP_BUF_SIZE 2048 +#define MBUF_DUMP_BUF_SIZE 3072 /* * mbuf watchdog is enabled by default on embedded platforms. It is @@ -1886,6 +1912,20 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) VERIFY(class != MC_16KCL || njcl > 0); VERIFY(buf->obj_next == NULL); + /* + * Synchronizing with m_clalloc, as it reads m_total, while we here + * are modifying m_total. + */ + while (mb_clalloc_busy) { + mb_clalloc_waiters++; + (void) msleep(mb_clalloc_waitchan, mbuf_mlock, + (PZERO-1), "m_clalloc", NULL); + LCK_MTX_ASSERT(mbuf_mlock, LCK_MTX_ASSERT_OWNED); + } + + /* We are busy now; tell everyone else to go away */ + mb_clalloc_busy = TRUE; + sp = slab_get(buf); VERIFY(sp->sl_class == class && slab_inrange(sp, buf) && (sp->sl_flags & (SLF_MAPPED | SLF_PARTIAL)) == SLF_MAPPED); @@ -2076,6 +2116,13 @@ slab_free(mbuf_class_t class, mcache_obj_t *buf) /* Reinsert the slab to the class's slab list */ if (slab_is_detached(sp)) slab_insert(sp, class); + + /* We're done; let others enter */ + mb_clalloc_busy = FALSE; + if (mb_clalloc_waiters > 0) { + mb_clalloc_waiters = 0; + wakeup(mb_clalloc_waitchan); + } } /* @@ -2781,6 +2828,42 @@ mbuf_cslab_audit(void *arg, mcache_obj_t *list, boolean_t alloc) } } +static void +m_vm_error_stats(uint32_t *cnt, uint64_t *ts, uint64_t *size, + uint64_t alloc_size, kern_return_t error) +{ + + *cnt = *cnt + 1; + *ts = net_uptime(); + if (size) { + *size = alloc_size; + } + _CASSERT(sizeof(mb_kmem_stats) / sizeof(mb_kmem_stats[0]) == + sizeof(mb_kmem_stats_labels) / sizeof(mb_kmem_stats_labels[0])); + switch (error) { + case KERN_SUCCESS: + break; + case KERN_INVALID_ARGUMENT: + mb_kmem_stats[0]++; + break; + case KERN_INVALID_ADDRESS: + mb_kmem_stats[1]++; + break; + case KERN_RESOURCE_SHORTAGE: + mb_kmem_stats[2]++; + break; + case KERN_NO_SPACE: + mb_kmem_stats[3]++; + break; + case KERN_FAILURE: + mb_kmem_stats[4]++; + break; + default: + mb_kmem_stats[5]++; + break; + } +} + /* * Allocate some number of mbuf clusters and place on cluster freelist. */ @@ -2795,6 +2878,7 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) mcache_obj_t *con_list = NULL; mcl_slab_t *sp; mbuf_class_t class; + kern_return_t error; /* Set if a buffer allocation needs allocation of multiple pages */ large_buffer = ((bufsize == m_maxsize(MC_16KCL)) && @@ -2841,24 +2925,41 @@ m_clalloc(const u_int32_t num, const int wait, const u_int32_t bufsize) lck_mtx_unlock(mbuf_mlock); size = round_page(i * bufsize); - page = kmem_mb_alloc(mb_map, size, large_buffer); + page = kmem_mb_alloc(mb_map, size, large_buffer, &error); /* * If we did ask for "n" 16KB physically contiguous chunks * and didn't get them, then please try again without this * restriction. */ - if (large_buffer && page == 0) - page = kmem_mb_alloc(mb_map, size, 0); + net_update_uptime(); + if (large_buffer && page == 0) { + m_vm_error_stats(&mb_kmem_contig_failed, + &mb_kmem_contig_failed_ts, + &mb_kmem_contig_failed_size, + size, error); + page = kmem_mb_alloc(mb_map, size, 0, &error); + } if (page == 0) { + m_vm_error_stats(&mb_kmem_failed, + &mb_kmem_failed_ts, + &mb_kmem_failed_size, + size, error); +#if PAGE_SIZE == 4096 if (bufsize == m_maxsize(MC_BIGCL)) { +#else + if (bufsize >= m_maxsize(MC_BIGCL)) { +#endif /* Try for 1 page if failed */ size = PAGE_SIZE; - page = kmem_mb_alloc(mb_map, size, 0); + page = kmem_mb_alloc(mb_map, size, 0, &error); } if (page == 0) { + m_vm_error_stats(&mb_kmem_one_failed, + &mb_kmem_one_failed_ts, + NULL, size, error); lck_mtx_lock(mbuf_mlock); goto out; } @@ -6408,7 +6509,7 @@ mbuf_sleep(mbuf_class_t class, unsigned int num, int wait) mbuf_watchdog(); mb_waiters++; - m_region_expand(class) += num; + m_region_expand(class) += m_total(class) + num; /* wake up the worker thread */ if (class > MC_MBUF && mbuf_worker_ready && mbuf_worker_needs_wakeup) { @@ -6441,15 +6542,19 @@ mbuf_worker_thread(void) while (1) { lck_mtx_lock(mbuf_mlock); + mbuf_worker_run_cnt++; mbuf_expand = 0; if (m_region_expand(MC_CL) > 0) { int n; - + mb_expand_cl_cnt++; /* Adjust to current number of cluster in use */ n = m_region_expand(MC_CL) - (m_total(MC_CL) - m_infree(MC_CL)); if ((n + m_total(MC_CL)) > m_maxlimit(MC_CL)) n = m_maxlimit(MC_CL) - m_total(MC_CL); + if (n > 0) { + mb_expand_cl_total += n; + } m_region_expand(MC_CL) = 0; if (n > 0 && freelist_populate(MC_CL, n, M_WAIT) > 0) @@ -6457,12 +6562,15 @@ mbuf_worker_thread(void) } if (m_region_expand(MC_BIGCL) > 0) { int n; - + mb_expand_bigcl_cnt++; /* Adjust to current number of 4 KB cluster in use */ n = m_region_expand(MC_BIGCL) - (m_total(MC_BIGCL) - m_infree(MC_BIGCL)); if ((n + m_total(MC_BIGCL)) > m_maxlimit(MC_BIGCL)) n = m_maxlimit(MC_BIGCL) - m_total(MC_BIGCL); + if (n > 0) { + mb_expand_bigcl_total += n; + } m_region_expand(MC_BIGCL) = 0; if (n > 0 && freelist_populate(MC_BIGCL, n, M_WAIT) > 0) @@ -6470,12 +6578,15 @@ mbuf_worker_thread(void) } if (m_region_expand(MC_16KCL) > 0) { int n; - + mb_expand_16kcl_cnt++; /* Adjust to current number of 16 KB cluster in use */ n = m_region_expand(MC_16KCL) - (m_total(MC_16KCL) - m_infree(MC_16KCL)); if ((n + m_total(MC_16KCL)) > m_maxlimit(MC_16KCL)) n = m_maxlimit(MC_16KCL) - m_total(MC_16KCL); + if (n > 0) { + mb_expand_16kcl_total += n; + } m_region_expand(MC_16KCL) = 0; if (n > 0) @@ -6491,12 +6602,20 @@ mbuf_worker_thread(void) if (mbuf_expand) { while (m_total(MC_MBUF) < (m_total(MC_BIGCL) + m_total(MC_CL))) { + mb_expand_cnt++; if (freelist_populate(MC_MBUF, 1, M_WAIT) == 0) break; } } mbuf_worker_needs_wakeup = TRUE; + /* + * If there's a deadlock and we're not sending / receiving + * packets, net_uptime() won't be updated. Update it here + * so we are sure it's correct. + */ + net_update_uptime(); + mbuf_worker_last_runtime = net_uptime(); assert_wait((caddr_t)&mbuf_worker_needs_wakeup, THREAD_UNINT); lck_mtx_unlock(mbuf_mlock); @@ -7256,7 +7375,8 @@ static struct mbtypes { static char * mbuf_dump(void) { - unsigned long totmem = 0, totfree = 0, totmbufs, totused, totpct; + unsigned long totmem = 0, totfree = 0, totmbufs, totused, totpct, + totreturned = 0; u_int32_t m_mbufs = 0, m_clfree = 0, m_bigclfree = 0; u_int32_t m_mbufclfree = 0, m_mbufbigclfree = 0; u_int32_t m_16kclusters = 0, m_16kclfree = 0, m_mbuf16kclfree = 0; @@ -7299,6 +7419,7 @@ mbuf_dump(void) totmem += mem; totfree += (sp->mbcl_mc_cached + sp->mbcl_infree) * sp->mbcl_size; + totreturned += sp->mbcl_release_cnt; } @@ -7363,6 +7484,52 @@ mbuf_dump(void) k = snprintf(c, clen, "%lu KB allocated to network (approx. %lu%% " "in use)\n", totmem / 1024, totpct); MBUF_DUMP_BUF_CHK(); + k = snprintf(c, clen, "%lu KB returned to the system\n", + totreturned / 1024); + MBUF_DUMP_BUF_CHK(); + + net_update_uptime(); + k = snprintf(c, clen, + "VM allocation failures: contiguous %u, normal %u, one page %u\n", + mb_kmem_contig_failed, mb_kmem_failed, mb_kmem_one_failed); + MBUF_DUMP_BUF_CHK(); + if (mb_kmem_contig_failed_ts || mb_kmem_failed_ts || + mb_kmem_one_failed_ts) { + k = snprintf(c, clen, + "VM allocation failure timestamps: contiguous %llu " + "(size %llu), normal %llu (size %llu), one page %llu " + "(now %llu)\n", + mb_kmem_contig_failed_ts, mb_kmem_contig_failed_size, + mb_kmem_failed_ts, mb_kmem_failed_size, + mb_kmem_one_failed_ts, net_uptime()); + MBUF_DUMP_BUF_CHK(); + k = snprintf(c, clen, + "VM return codes: "); + MBUF_DUMP_BUF_CHK(); + for (i = 0; + i < sizeof(mb_kmem_stats) / sizeof(mb_kmem_stats[0]); + i++) { + k = snprintf(c, clen, "%s: %u ", mb_kmem_stats_labels[i], + mb_kmem_stats[i]); + MBUF_DUMP_BUF_CHK(); + } + k = snprintf(c, clen, "\n"); + MBUF_DUMP_BUF_CHK(); + } + k = snprintf(c, clen, + "worker thread runs: %u, expansions: %llu, cl %llu/%llu, " + "bigcl %llu/%llu, 16k %llu/%llu\n", mbuf_worker_run_cnt, + mb_expand_cnt, mb_expand_cl_cnt, mb_expand_cl_total, + mb_expand_bigcl_cnt, mb_expand_bigcl_total, mb_expand_16kcl_cnt, + mb_expand_16kcl_total); + MBUF_DUMP_BUF_CHK(); + if (mbuf_worker_last_runtime != 0) { + k = snprintf(c, clen, "worker thread last run time: " + "%llu (%llu seconds ago)\n", + mbuf_worker_last_runtime, + net_uptime() - mbuf_worker_last_runtime); + MBUF_DUMP_BUF_CHK(); + } /* mbuf leak detection statistics */ mleak_update_stats(); @@ -7736,6 +7903,7 @@ mbuf_report_peak_usage(void) u_int64_t uptime; struct nstat_sysinfo_data ns_data; uint32_t memreleased = 0; + static uint32_t prevmemreleased; uptime = net_uptime(); lck_mtx_lock(mbuf_mlock); @@ -7762,8 +7930,9 @@ mbuf_report_peak_usage(void) for (i = 0; i < NELEM(mbuf_table); i++) { m_peak(m_class(i)) = m_total(m_class(i)); memreleased += m_release_cnt(i); - m_release_cnt(i) = 0; } + memreleased = memreleased - prevmemreleased; + prevmemreleased = memreleased; mb_peak_newreport = FALSE; lck_mtx_unlock(mbuf_mlock); @@ -7997,7 +8166,83 @@ m_drain_force_sysctl SYSCTL_HANDLER_ARGS return (err); } +#if DEBUG || DEVELOPMENT + +static int mbtest_val; +static int mbtest_running; + +static void mbtest_thread(__unused void *arg) +{ + int i; + + printf("%s thread starting\n", __func__); + + for (i = 0; i < 1000; i++) { + unsigned int needed = 100000; + struct mbuf *m1, *m2, *m3; + + if (njcl > 0) { + needed = 100000; + m3 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, M16KCLBYTES); + m_freem_list(m3); + } + + needed = 100000; + m2 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, MBIGCLBYTES); + m_freem_list(m2); + + m1 = m_getpackets_internal(&needed, 0, M_DONTWAIT, 0, MCLBYTES); + m_freem_list(m1); + } + + printf("%s thread ending\n", __func__); + + OSDecrementAtomic(&mbtest_running); + wakeup_one((caddr_t)&mbtest_running); +} + +static void sysctl_mbtest(void) +{ + /* We launch three threads - wait for all of them */ + OSIncrementAtomic(&mbtest_running); + OSIncrementAtomic(&mbtest_running); + OSIncrementAtomic(&mbtest_running); + + thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10); + thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10); + thread_call_func_delayed((thread_call_func_t)mbtest_thread, NULL, 10); + + while (mbtest_running) { + msleep((caddr_t)&mbtest_running, NULL, PUSER, "mbtest_running", NULL); + } +} + +static int +mbtest SYSCTL_HANDLER_ARGS +{ +#pragma unused(arg1, arg2) + int error = 0, val, oldval = mbtest_val; + + val = oldval; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr) + return (error); + + if (val != oldval) + sysctl_mbtest(); + + mbtest_val = val; + + return (error); +} +#endif + SYSCTL_DECL(_kern_ipc); +#if DEBUG || DEVELOPMENT +SYSCTL_PROC(_kern_ipc, OID_AUTO, mbtest, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &mbtest_val, 0, &mbtest, "I", + "Toggle to test mbufs"); +#endif SYSCTL_PROC(_kern_ipc, KIPC_MBSTAT, mbstat, CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, mbstat_sysctl, "S,mbstat", ""); diff --git a/bsd/kern/uipc_syscalls.c b/bsd/kern/uipc_syscalls.c index 53043d70b..73c6ba447 100644 --- a/bsd/kern/uipc_syscalls.c +++ b/bsd/kern/uipc_syscalls.c @@ -822,11 +822,15 @@ connectx_nocancel(struct proc *p, struct connectx_args *uap, int *retval) if (uap->iov != USER_ADDR_NULL) { /* Verify range before calling uio_create() */ - if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) - return (EINVAL); + if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV){ + error = EINVAL; + goto out; + } - if (uap->len == USER_ADDR_NULL) - return (EINVAL); + if (uap->len == USER_ADDR_NULL){ + error = EINVAL; + goto out; + } /* allocate a uio to hold the number of iovecs passed */ auio = uio_create(uap->iovcnt, 0, diff --git a/bsd/miscfs/devfs/devfs_vfsops.c b/bsd/miscfs/devfs/devfs_vfsops.c index 604cf1b10..db3e249e9 100644 --- a/bsd/miscfs/devfs/devfs_vfsops.c +++ b/bsd/miscfs/devfs/devfs_vfsops.c @@ -128,10 +128,8 @@ devfs_init(__unused struct vfsconf *vfsp) UID_ROOT, GID_WHEEL, 0666, "zero"); uint32_t logging_config = atm_get_diagnostic_config(); - if ( logging_config & ATM_ENABLE_LEGACY_LOGGING ) { - devfs_make_node(makedev(6, 0), DEVFS_CHAR, + devfs_make_node(makedev(6, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0600, "klog"); - } if ( !(logging_config & ATM_TRACE_DISABLE) ) { devfs_make_node(makedev(7, 0), DEVFS_CHAR, diff --git a/bsd/net/if.c b/bsd/net/if.c index 244132134..53df43d54 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -2062,7 +2062,14 @@ ifnet_reset_order(u_int32_t *ordered_indices, u_int32_t count) int error = 0; ifnet_head_lock_exclusive(); - + for (u_int32_t order_index = 0; order_index < count; order_index++) { + if (ordered_indices[order_index] == IFSCOPE_NONE || + ordered_indices[order_index] > (uint32_t)if_index) { + error = EINVAL; + ifnet_head_done(); + return (error); + } + } // Flush current ordered list for (ifp = TAILQ_FIRST(&ifnet_ordered_head); ifp != NULL; ifp = TAILQ_FIRST(&ifnet_ordered_head)) { @@ -2075,10 +2082,6 @@ ifnet_reset_order(u_int32_t *ordered_indices, u_int32_t count) for (u_int32_t order_index = 0; order_index < count; order_index++) { u_int32_t interface_index = ordered_indices[order_index]; - if (interface_index == IFSCOPE_NONE || - interface_index > (uint32_t)if_index) { - break; - } ifp = ifindex2ifnet[interface_index]; if (ifp == NULL) { continue; @@ -2130,7 +2133,6 @@ ifioctl_iforder(u_long cmd, caddr_t data) { int error = 0; u_int32_t *ordered_indices = NULL; - if (data == NULL) { return (EINVAL); } @@ -2139,7 +2141,7 @@ ifioctl_iforder(u_long cmd, caddr_t data) case SIOCSIFORDER: { /* struct if_order */ struct if_order *ifo = (struct if_order *)(void *)data; - if (ifo->ifo_count > (u_int32_t)if_index) { + if (ifo->ifo_count == 0 || ifo->ifo_count > (u_int32_t)if_index) { error = EINVAL; break; } @@ -2162,8 +2164,22 @@ ifioctl_iforder(u_long cmd, caddr_t data) break; } } + /* ordered_indices should not contain duplicates */ + bool found_duplicate = FALSE; + for (uint32_t i = 0; i < (ifo->ifo_count - 1) && !found_duplicate ; i++){ + for (uint32_t j = i + 1; j < ifo->ifo_count && !found_duplicate ; j++){ + if (ordered_indices[j] == ordered_indices[i]){ + error = EINVAL; + found_duplicate = TRUE; + break; + } + } + } + if (found_duplicate) + break; error = ifnet_reset_order(ordered_indices, ifo->ifo_count); + break; } diff --git a/bsd/sys/kasl.h b/bsd/sys/kasl.h index c9eef57a9..a00ec9e25 100644 --- a/bsd/sys/kasl.h +++ b/bsd/sys/kasl.h @@ -43,6 +43,8 @@ kern_asl_msg_va(int level, const char *facility, int num_pairs, extern int kern_asl_msg(int level, const char *facility, int num_pairs, ...); -int escape_str(char *str, int len, int buflen); +extern int escape_str(char *str, int len, int buflen); +extern void fpxlog_init(void); +extern void fpxlog(int, uint32_t, uint32_t, uint32_t); #endif /* !_SYS_KASL_H_ */ diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index 0e3988617..15527d926 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -636,30 +636,31 @@ extern void kdebug_reset(void); #define DBG_IODISK DBG_IOSTORAGE /* OBSOLETE: Use DBG_IOSTORAGE instead */ /* **** The Kernel Debug Sub Classes for Device Drivers (DBG_DRIVERS) **** */ -#define DBG_DRVSTORAGE 1 /* Storage layers */ -#define DBG_DRVNETWORK 2 /* Network layers */ -#define DBG_DRVKEYBOARD 3 /* Keyboard */ -#define DBG_DRVHID 4 /* HID Devices */ -#define DBG_DRVAUDIO 5 /* Audio */ -#define DBG_DRVSERIAL 7 /* Serial */ -#define DBG_DRVSAM 8 /* SCSI Architecture Model layers */ -#define DBG_DRVPARALLELATA 9 /* Parallel ATA */ -#define DBG_DRVPARALLELSCSI 10 /* Parallel SCSI */ -#define DBG_DRVSATA 11 /* Serial ATA */ -#define DBG_DRVSAS 12 /* SAS */ -#define DBG_DRVFIBRECHANNEL 13 /* FiberChannel */ -#define DBG_DRVUSB 14 /* USB */ -#define DBG_DRVBLUETOOTH 15 /* Bluetooth */ -#define DBG_DRVFIREWIRE 16 /* FireWire */ -#define DBG_DRVINFINIBAND 17 /* Infiniband */ -#define DBG_DRVGRAPHICS 18 /* Graphics */ -#define DBG_DRVSD 19 /* Secure Digital */ -#define DBG_DRVNAND 20 /* NAND drivers and layers */ -#define DBG_SSD 21 /* SSD */ -#define DBG_DRVSPI 22 /* SPI */ -#define DBG_DRVWLAN_802_11 23 /* WLAN 802.11 */ -#define DBG_DRVSSM 24 /* System State Manager(AppleSSM) */ -#define DBG_DRVSMC 25 /* System Management Controller */ +#define DBG_DRVSTORAGE 1 /* Storage layers */ +#define DBG_DRVNETWORK 2 /* Network layers */ +#define DBG_DRVKEYBOARD 3 /* Keyboard */ +#define DBG_DRVHID 4 /* HID Devices */ +#define DBG_DRVAUDIO 5 /* Audio */ +#define DBG_DRVSERIAL 7 /* Serial */ +#define DBG_DRVSAM 8 /* SCSI Architecture Model layers */ +#define DBG_DRVPARALLELATA 9 /* Parallel ATA */ +#define DBG_DRVPARALLELSCSI 10 /* Parallel SCSI */ +#define DBG_DRVSATA 11 /* Serial ATA */ +#define DBG_DRVSAS 12 /* SAS */ +#define DBG_DRVFIBRECHANNEL 13 /* FiberChannel */ +#define DBG_DRVUSB 14 /* USB */ +#define DBG_DRVBLUETOOTH 15 /* Bluetooth */ +#define DBG_DRVFIREWIRE 16 /* FireWire */ +#define DBG_DRVINFINIBAND 17 /* Infiniband */ +#define DBG_DRVGRAPHICS 18 /* Graphics */ +#define DBG_DRVSD 19 /* Secure Digital */ +#define DBG_DRVNAND 20 /* NAND drivers and layers */ +#define DBG_SSD 21 /* SSD */ +#define DBG_DRVSPI 22 /* SPI */ +#define DBG_DRVWLAN_802_11 23 /* WLAN 802.11 */ +#define DBG_DRVSSM 24 /* System State Manager(AppleSSM) */ +#define DBG_DRVSMC 25 /* System Management Controller */ +#define DBG_DRVMACEFIMANAGER 26 /* Mac EFI Manager */ /* Backwards compatibility */ #define DBG_DRVPOINTING DBG_DRVHID /* OBSOLETE: Use DBG_DRVHID instead */ @@ -842,6 +843,7 @@ extern void kdebug_reset(void); #define DBG_APP_DFR 0x0E #define DBG_APP_SAMBA 0x80 #define DBG_APP_EOSSUPPORT 0x81 +#define DBG_APP_MACEFIMANAGER 0x82 /* Kernel Debug codes for Throttling (DBG_THROTTLE) */ #define OPEN_THROTTLE_WINDOW 0x1 diff --git a/bsd/sys/msgbuf.h b/bsd/sys/msgbuf.h index 5b8211cac..2bceb084e 100644 --- a/bsd/sys/msgbuf.h +++ b/bsd/sys/msgbuf.h @@ -78,8 +78,9 @@ struct msgbuf { #ifdef XNU_KERNEL_PRIVATE __BEGIN_DECLS extern struct msgbuf *msgbufp; +extern struct msgbuf *aslbufp; extern void log_putc(char); -extern void log_putc_locked(char); +extern void log_putc_locked(struct msgbuf *, char); extern int log_setsize(int size); extern int log_dmesg(user_addr_t, uint32_t, int32_t *); __END_DECLS diff --git a/bsd/sys/syslog.h b/bsd/sys/syslog.h index 42ff28cc6..af3705e2b 100644 --- a/bsd/sys/syslog.h +++ b/bsd/sys/syslog.h @@ -326,7 +326,6 @@ struct reg_desc { __BEGIN_DECLS void log(int, const char *, ...); #ifdef XNU_KERNEL_PRIVATE -void logpri(int); int vaddlog(const char *, va_list) __printflike(1,0); void logtime(time_t); #endif /* XNU_KERNEL_PRIVATE */ diff --git a/bsd/vfs/vfs_fslog.c b/bsd/vfs/vfs_fslog.c index b13faa434..6dbd62b93 100644 --- a/bsd/vfs/vfs_fslog.c +++ b/bsd/vfs/vfs_fslog.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006-2014 Apple Inc. All rights reserved. + * Copyright (c) 2006-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -40,7 +40,8 @@ #include #include -#include +#include +#include #include @@ -105,3 +106,170 @@ fslog_extmod_msgtracer(proc_t caller, proc_t target) } } +#if defined(__x86_64__) + +/* + * Log information about floating point exception handling + */ + +static lck_mtx_t fpxlock; + +void +fpxlog_init(void) +{ + lck_grp_attr_t *lck_grp_attr = lck_grp_attr_alloc_init(); + lck_grp_t *lck_grp = lck_grp_alloc_init("fpx", lck_grp_attr); + lck_mtx_init(&fpxlock, lck_grp, LCK_ATTR_NULL); +} + +struct fpx_event { + uuid_t fe_uuid; + uint32_t fe_code; + uint32_t fe_xcpt; + TAILQ_ENTRY(fpx_event) fe_link; +}; + +static bool +match_fpx_event(const struct fpx_event *fe, + const uuid_t uuid, const uint32_t code, const uint32_t xcpt) +{ + return (code == fe->fe_code && xcpt == fe->fe_xcpt && + 0 == memcmp(uuid, fe->fe_uuid, sizeof (uuid_t))); +} + +#if FPX_EVENT_DBG +static __attribute__((noinline)) void +print_fpx_event(const char *pfx, const struct fpx_event *fe) +{ + uuid_string_t uustr; + uuid_unparse_upper(fe->fe_uuid, uustr); + printf("%s: code 0x%x xcpt 0x%x uuid '%s'\n", + pfx, fe->fe_code, fe->fe_xcpt, uustr); +} +#define DPRINTF_FPX_EVENT(pfx, fe) print_fpx_event(pfx, fe) +#else +#define DPRINTF_FPX_EVENT(pfx, fe) /* nothing */ +#endif + +#define MAX_DISTINCT_FPX_EVENTS 101 /* (approx one page of heap) */ + +/* + * Filter to detect "new" tuples. + * Uses limited amount of state, managed LRU. + * Optimized to ignore repeated invocation with the same tuple. + * + * Note that there are 6 exception types, two types of FP, and + * many binaries, so don't make the list bound too small. + * It's also a linear search, so don't make it too large either. + * Next level filtering provided by syslogd, and summarization. + */ +static bool +novel_fpx_event(const uuid_t uuid, uint32_t code, uint32_t xcpt) +{ + static TAILQ_HEAD(fpx_event_head, fpx_event) fehead = + TAILQ_HEAD_INITIALIZER(fehead); + struct fpx_event *fe; + + lck_mtx_lock(&fpxlock); + + fe = TAILQ_FIRST(&fehead); + if (NULL != fe && + match_fpx_event(fe, uuid, code, xcpt)) { + /* seen before and element already at head */ + lck_mtx_unlock(&fpxlock); + DPRINTF_FPX_EVENT("seen, head", fe); + return (false); + } + + unsigned int count = 0; + + TAILQ_FOREACH(fe, &fehead, fe_link) { + if (match_fpx_event(fe, uuid, code, xcpt)) { + /* seen before, now move element to head */ + TAILQ_REMOVE(&fehead, fe, fe_link); + TAILQ_INSERT_HEAD(&fehead, fe, fe_link); + lck_mtx_unlock(&fpxlock); + DPRINTF_FPX_EVENT("seen, moved to head", fe); + return (false); + } + count++; + } + + /* not recorded here => novel */ + + if (count >= MAX_DISTINCT_FPX_EVENTS) { + /* reuse LRU element */ + fe = TAILQ_LAST(&fehead, fpx_event_head); + TAILQ_REMOVE(&fehead, fe, fe_link); + DPRINTF_FPX_EVENT("reusing", fe); + } else { + /* add a new element to the list */ + fe = kalloc(sizeof (*fe)); + } + memcpy(fe->fe_uuid, uuid, sizeof (uuid_t)); + fe->fe_code = code; + fe->fe_xcpt = xcpt; + TAILQ_INSERT_HEAD(&fehead, fe, fe_link); + lck_mtx_unlock(&fpxlock); + + DPRINTF_FPX_EVENT("novel", fe); + + return (true); +} + +void +fpxlog( + int code, /* Mach exception code: e.g. 5 or 8 */ + uint32_t stat, /* Full FP status register bits */ + uint32_t ctrl, /* Full FP control register bits */ + uint32_t xcpt) /* Exception bits from FP status */ +{ + proc_t p = current_proc(); + if (PROC_NULL == p) + return; + + uuid_t uuid; + proc_getexecutableuuid(p, uuid, sizeof (uuid)); + + /* + * Check to see if an exception with this + * has been seen before. If "novel" then log a message. + */ + if (!novel_fpx_event(uuid, code, xcpt)) + return; + + const size_t nmlen = 2 * MAXCOMLEN + 1; + char nm[nmlen] = {}; + proc_selfname(nm, nmlen); + if (escape_str(nm, strlen(nm) + 1, nmlen)) + snprintf(nm, nmlen, "(a.out)"); + + const size_t slen = 8 + 1 + 8 + 1; + char xcptstr[slen], csrstr[slen]; + + snprintf(xcptstr, slen, "%x.%x", code, xcpt); + if (ctrl == stat) + snprintf(csrstr, slen, "%x", ctrl); + else + snprintf(csrstr, slen, "%x.%x", ctrl, stat); + +#if DEVELOPMENT || DEBUG + printf("%s[%d]: com.apple.kernel.fpx: %s, %s\n", + nm, proc_pid(p), xcptstr, csrstr); +#endif + kern_asl_msg(LOG_DEBUG, "messagetracer", 5, + /* 0 */ "com.apple.message.domain", "com.apple.kernel.fpx", + /* 1 */ "com.apple.message.signature", nm, + /* 2 */ "com.apple.message.signature2", xcptstr, + /* 3 */ "com.apple.message.value", csrstr, + /* 4 */ "com.apple.message.summarize", "YES", + NULL); +} + +#else + +void +fpxlog_init(void) +{} + +#endif /* __x86_64__ */ diff --git a/config/MasterVersion b/config/MasterVersion index d697dff22..461cd787f 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -17.0.0 +17.2.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index 0572ef51d..35c330581 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -234,8 +234,14 @@ struct IOMDDMAWalkSegmentArgs { UInt64 fOffset; // Input/Output offset UInt64 fIOVMAddr, fLength; // Output variables UInt8 fMapped; // Input Variable, Require mapped IOVMA + UInt64 fMappedBase; // Input base of mapping }; typedef UInt8 IOMDDMAWalkSegmentState[128]; +// fMapped: +enum +{ + kIOMDDMAWalkMappedLocal = 2 +}; #endif /* KERNEL_PRIVATE */ diff --git a/iokit/IOKit/IOPlatformExpert.h b/iokit/IOKit/IOPlatformExpert.h index 936809c7c..f66f8da1f 100644 --- a/iokit/IOKit/IOPlatformExpert.h +++ b/iokit/IOKit/IOPlatformExpert.h @@ -60,7 +60,8 @@ enum { kPEPanicSync, kPEPagingOff, kPEPanicBegin, - kPEPanicEnd + kPEPanicEnd, + kPEPanicDiskShutdown }; extern int (*PE_halt_restart)(unsigned int type); extern int PEHaltRestart(unsigned int type); diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index 1e447d199..5a649dfdf 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -851,6 +851,7 @@ class IOPMrootDomain: public IOService void evaluateAssertions(IOPMDriverAssertionType newAssertions, IOPMDriverAssertionType oldAssertions); + void evaluateWranglerAssertions(); void deregisterPMSettingObject( PMSettingObject * pmso ); diff --git a/iokit/Kernel/IODMACommand.cpp b/iokit/Kernel/IODMACommand.cpp index 5feadeb14..261f86b1b 100644 --- a/iokit/Kernel/IODMACommand.cpp +++ b/iokit/Kernel/IODMACommand.cpp @@ -1138,8 +1138,16 @@ IODMACommand::genIOVMSegments(uint32_t op, internalState->fIOVMAddrValid = state->fIOVMAddr = 0; internalState->fNextRemapPage = NULL; internalState->fNewMD = false; - state->fMapped = (0 != fMapper); mdOp = kIOMDFirstSegment; + if (fMapper) + { + if (internalState->fLocalMapperAllocValid) + { + state->fMapped = kIOMDDMAWalkMappedLocal; + state->fMappedBase = internalState->fLocalMapperAlloc; + } + else state->fMapped = true; + } }; UInt32 segIndex = 0; diff --git a/iokit/Kernel/IOLib.cpp b/iokit/Kernel/IOLib.cpp index c4a63b9dd..2fba9bc5d 100644 --- a/iokit/Kernel/IOLib.cpp +++ b/iokit/Kernel/IOLib.cpp @@ -99,7 +99,6 @@ __doprnt( extern void cons_putc_locked(char); extern void bsd_log_lock(void); extern void bsd_log_unlock(void); -extern void logwakeup(); /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 3b59323b8..ce2963317 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -2193,7 +2193,8 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * isP = (InternalState *) vData; UInt offset = isP->fIO.fOffset; - bool mapped = isP->fIO.fMapped; + uint8_t mapped = isP->fIO.fMapped; + uint64_t mappedBase; if (mapped && (kIOMemoryRemote & _flags)) return (kIOReturnNotAttached); @@ -2218,6 +2219,20 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * } } + if (kIOMDDMAWalkMappedLocal == mapped) mappedBase = isP->fIO.fMappedBase; + else if (mapped) + { + if (IOMapper::gSystem + && (!(kIOMemoryHostOnly & _flags)) + && _memoryEntries + && (dataP = getDataP(_memoryEntries)) + && dataP->fMappedBaseValid) + { + mappedBase = dataP->fMappedBase; + } + else mapped = 0; + } + if (offset >= _length) return (offset == _length)? kIOReturnOverrun : kIOReturnInternalError; @@ -2233,7 +2248,6 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * UInt length; UInt64 address; - if ( (_flags & kIOMemoryTypeMask) == kIOMemoryTypePhysical) { // Physical address based memory descriptor @@ -2250,10 +2264,9 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * length = off2Ind - offset; address = physP[ind - 1].address + len - length; - if (true && mapped && _memoryEntries - && (dataP = getDataP(_memoryEntries)) && dataP->fMappedBaseValid) + if (true && mapped) { - address = dataP->fMappedBase + offset; + address = mappedBase + offset; } else { @@ -2287,10 +2300,9 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * length = off2Ind - offset; address = physP[ind - 1].address + len - length; - if (true && mapped && _memoryEntries - && (dataP = getDataP(_memoryEntries)) && dataP->fMappedBaseValid) + if (true && mapped) { - address = dataP->fMappedBase + offset; + address = mappedBase + offset; } else { @@ -2339,9 +2351,9 @@ IOReturn IOGeneralMemoryDescriptor::dmaCommandOperation(DMACommandOps op, void * // If a mapped address is requested and this is a pre-mapped IOPL // then just need to compute an offset relative to the mapped base. - if (mapped && dataP->fMappedBaseValid) { + if (mapped) { offset += (ioplInfo.fPageOffset & PAGE_MASK); - address = trunc_page_64(dataP->fMappedBase) + ptoa_64(ioplInfo.fMappedPage) + offset; + address = trunc_page_64(mappedBase) + ptoa_64(ioplInfo.fMappedPage) + offset; continue; // Done leave do/while(false) now } @@ -3084,14 +3096,15 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) for (UInt range = 0; range < _rangesCount; range++) { ioPLBlock iopl; - mach_vm_address_t startPage; + mach_vm_address_t startPage, startPageOffset; mach_vm_size_t numBytes; ppnum_t highPage = 0; // Get the startPage address and length of vec[range] getAddrLenForInd(startPage, numBytes, type, vec, range); - iopl.fPageOffset = startPage & PAGE_MASK; - numBytes += iopl.fPageOffset; + startPageOffset = startPage & PAGE_MASK; + iopl.fPageOffset = startPageOffset; + numBytes += startPageOffset; startPage = trunc_page_64(startPage); if (mapper) @@ -3186,7 +3199,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) iopl.fIOMDOffset = mdOffset; iopl.fPageInfo = pageIndex; - if (mapper && pageIndex && (page_mask & (mdOffset + iopl.fPageOffset))) dataP->fDiscontig = true; + if (mapper && pageIndex && (page_mask & (mdOffset + startPageOffset))) dataP->fDiscontig = true; if (!_memoryEntries->appendBytes(&iopl, sizeof(iopl))) { // Clean up partial created and unsaved iopl diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index a28aa990f..3b778df40 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -2347,6 +2347,21 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) ((IOService *)this)->stop_watchdog_timer(); //14456299 lowBatteryCondition = false; +#if DEVELOPMENT || DEBUG + extern int g_should_log_clock_adjustments; + if (g_should_log_clock_adjustments) { + clock_sec_t secs = 0; + clock_usec_t microsecs = 0; + uint64_t now_b = mach_absolute_time(); + + PEGetUTCTimeOfDay(&secs, µsecs); + + uint64_t now_a = mach_absolute_time(); + os_log(OS_LOG_DEFAULT, "%s PMU before going to sleep %lu s %d u %llu abs_b_PEG %llu abs_a_PEG \n", + __func__, (unsigned long)secs, microsecs, now_b, now_a); + } +#endif + getPlatform()->sleepKernel(); // The CPU(s) are off at this point, @@ -6109,7 +6124,9 @@ bool IOPMrootDomain::displayWranglerMatchPublished( IONotifier * notifier __unused) { #if !NO_KERNEL_HID - // found the display wrangler, now install a handler + // found the display wrangler, check for any display assertions already created + gRootDomain->evaluateWranglerAssertions(); + // install a handler if( !newService->registerInterest( gIOGeneralInterest, &displayWranglerNotification, target, 0) ) { @@ -7555,6 +7572,22 @@ void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, I } } +void IOPMrootDomain::evaluateWranglerAssertions() +{ + if (gIOPMWorkLoop->inGate() == false) { + gIOPMWorkLoop->runAction( + OSMemberFunctionCast(IOWorkLoop::Action, this, &IOPMrootDomain::evaluateWranglerAssertions), + (OSObject *)this); + + return; + } + + if (pmAssertions->getActivatedAssertions() & kIOPMDriverAssertionPreventDisplaySleepBit) { + DLOG("wrangler setIgnoreIdleTimer\(1) on matching\n"); + wrangler->setIgnoreIdleTimer( true ); + } +} + // MARK: - // MARK: Statistics diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index 008afdb78..274e69043 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2014 Apple Inc. All rights reserved. + * Copyright (c) 1998-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,7 +54,8 @@ extern "C" { #include } -#if defined(__x86_64__) +#if !CONFIG_EMBEDDED + /* * This will eventually be properly exported in * ER: Expose coprocessor version (T208/T290) in a kernel/kext header @@ -62,7 +63,9 @@ extern "C" { * header this ends up in. */ #define kCoprocessorMinVersion 0x00020000 -#endif + +boolean_t coprocessor_cross_panic_enabled = TRUE; +#endif /* !CONFIG_EMBEDDED */ void printDictionaryKeys (OSDictionary * inDictionary, char * inMsg); static void getCStringForObject(OSObject *inObj, char *outStr, size_t outStrLen); @@ -111,7 +114,7 @@ bool IOPlatformExpert::start( IOService * provider ) OSData * busFrequency; uint32_t debugFlags; -#if defined(__x86_64__) +#if !CONFIG_EMBEDDED IORegistryEntry *platform_entry = NULL; OSData *coprocessor_version_obj = NULL; uint64_t coprocessor_version = 0; @@ -167,7 +170,7 @@ bool IOPlatformExpert::start( IOService * provider ) } } -#if defined(__x86_64__) +#if !CONFIG_EMBEDDED platform_entry = IORegistryEntry::fromPath(kIODeviceTreePlane ":/efi/platform"); if (platform_entry != NULL) { coprocessor_version_obj = OSDynamicCast(OSData, platform_entry->getProperty("apple-coprocessor-version")); @@ -175,11 +178,12 @@ bool IOPlatformExpert::start( IOService * provider ) memcpy(&coprocessor_version, coprocessor_version_obj->getBytesNoCopy(), coprocessor_version_obj->getLength()); if (coprocessor_version >= kCoprocessorMinVersion) { coprocessor_paniclog_flush = TRUE; + extended_debug_log_init(); } } platform_entry->release(); } -#endif /* defined(__x86_64__) */ +#endif /* !CONFIG_EMBEDDED */ return( configure(provider) ); } @@ -877,6 +881,11 @@ int PEHaltRestart(unsigned int type) // Notify any listeners that we're done collecting // panic data before we call through to do the restart IOCPURunPlatformPanicActions(kPEPanicEnd); + + // Callout to shutdown the disk driver once we've returned from the + // kPEPanicEnd callback (and we know all core dumps on this system + // are complete). + IOCPURunPlatformPanicActions(kPEPanicDiskShutdown); } // Do an initial sync to flush as much panic data as possible, @@ -888,12 +897,21 @@ int PEHaltRestart(unsigned int type) PE_sync_panic_buffers(); } else if (type == kPEPanicEnd) { - IOCPURunPlatformPanicActions(type); +#if !CONFIG_EMBEDDED + if (coprocessor_cross_panic_enabled) +#endif + IOCPURunPlatformPanicActions(type); + } else if (type == kPEPanicBegin) { - // Only call the kPEPanicBegin callout once - if (!panic_begin_called) { - panic_begin_called = TRUE; - IOCPURunPlatformPanicActions(type); +#if !CONFIG_EMBEDDED + if (coprocessor_cross_panic_enabled) +#endif + { + // Only call the kPEPanicBegin callout once + if (!panic_begin_called) { + panic_begin_called = TRUE; + IOCPURunPlatformPanicActions(type); + } } } @@ -1156,6 +1174,27 @@ void IOPlatformExpert::registerNVRAMController(IONVRAMController * caller) entry->release( ); } #else /* !CONFIG_EMBEDDED */ + /* + * If we have panic debugging enabled and a prod-fused coprocessor, + * disable cross panics so that the co-processor doesn't cause the system + * to reset when we enter the debugger or hit a panic on the x86 side. + */ + if ( panicDebugging ) + { + entry = IORegistryEntry::fromPath( "/options", gIODTPlane ); + if ( entry ) + { + data = OSDynamicCast( OSData, entry->getProperty( "EffectiveProductionStatus" ) ); + if ( data && ( data->getLength( ) == sizeof( UInt8 ) ) ) { + UInt8 *isProdFused = (UInt8 *) data->getBytesNoCopy( ); + if ( *isProdFused ) { + coprocessor_cross_panic_enabled = FALSE; + } + } + entry->release( ); + } + } + entry = IORegistryEntry::fromPath( "/efi/platform", gIODTPlane ); if ( entry ) { diff --git a/iokit/Kernel/IOUserClient.cpp b/iokit/Kernel/IOUserClient.cpp index b16a516bb..6a3fc814b 100644 --- a/iokit/Kernel/IOUserClient.cpp +++ b/iokit/Kernel/IOUserClient.cpp @@ -1006,9 +1006,9 @@ IOReturn IOServiceMessageUserNotification::handler( void * ref, } else { + if( callerArgSize > kIOUserNotifyMaxMessageSize) + callerArgSize = kIOUserNotifyMaxMessageSize; argSize = callerArgSize; - if( argSize > kIOUserNotifyMaxMessageSize) - argSize = kIOUserNotifyMaxMessageSize; } // adjust message size for ipc restrictions @@ -2320,6 +2320,8 @@ static kern_return_t internal_io_service_add_notification( do { err = kIOReturnNoResources; + if (matching_size > (sizeof(io_struct_inband_t) * 1024)) return(kIOReturnMessageTooLarge); + if( !(sym = OSSymbol::withCString( notification_type ))) err = kIOReturnNoResources; diff --git a/iokit/Tests/TestIOMemoryDescriptor.cpp b/iokit/Tests/TestIOMemoryDescriptor.cpp index 11780e5f2..bc939f8ed 100644 --- a/iokit/Tests/TestIOMemoryDescriptor.cpp +++ b/iokit/Tests/TestIOMemoryDescriptor.cpp @@ -192,6 +192,87 @@ IODMACommandForceDoubleBufferTest(int newValue) return (0); } +// +static int __unused +IODMACommandLocalMappedNonContig(int newValue) +{ + IOReturn kr; + IOMemoryDescriptor * md; + IODMACommand * dma; + OSDictionary * matching; + IOService * device; + IOMapper * mapper; + IODMACommand::SegmentOptions segOptions = + { + .fStructSize = sizeof(segOptions), + .fNumAddressBits = 64, + .fMaxSegmentSize = 128*1024, + .fMaxTransferSize = 128*1024, + .fAlignment = 1, + .fAlignmentLength = 1, + .fAlignmentInternalSegments = 1 + }; + IODMACommand::Segment64 segments[1]; + UInt32 numSegments; + UInt64 dmaOffset; + UInt64 segPhys; + vm_address_t buffer; + vm_size_t bufSize = ptoa(4); + + if (!IOMapper::gSystem) return (0); + + buffer = 0; + kr = vm_allocate_kernel(kernel_map, &buffer, bufSize, VM_FLAGS_ANYWHERE, VM_KERN_MEMORY_IOKIT); + assert(KERN_SUCCESS == kr); + + // fragment the vmentries + kr = vm_inherit(kernel_map, buffer + ptoa(1), ptoa(1), VM_INHERIT_NONE); + assert(KERN_SUCCESS == kr); + + md = IOMemoryDescriptor::withAddressRange( + buffer + 0xa00, 0x2000, kIODirectionOutIn, kernel_task); + assert(md); + kr = md->prepare(kIODirectionOutIn); + assert(kIOReturnSuccess == kr); + + segPhys = md->getPhysicalSegment(0, NULL, 0); + + matching = IOService::nameMatching("XHC1"); + assert(matching); + device = IOService::copyMatchingService(matching); + matching->release(); + mapper = device ? IOMapper::copyMapperForDeviceWithIndex(device, 0) : NULL; + + dma = IODMACommand::withSpecification(kIODMACommandOutputHost64, &segOptions, + kIODMAMapOptionMapped, + mapper, NULL); + assert(dma); + kr = dma->setMemoryDescriptor(md, true); + assert(kIOReturnSuccess == kr); + + dmaOffset = 0; + numSegments = 1; + kr = dma->gen64IOVMSegments(&dmaOffset, &segments[0], &numSegments); + assert(kIOReturnSuccess == kr); + assert(1 == numSegments); + + if (mapper) assertf(segments[0].fIOVMAddr != segPhys, "phys !local 0x%qx, 0x%qx, %p", segments[0].fIOVMAddr, segPhys, dma); + + kr = dma->clearMemoryDescriptor(true); + assert(kIOReturnSuccess == kr); + dma->release(); + + kr = md->complete(kIODirectionOutIn); + assert(kIOReturnSuccess == kr); + md->release(); + + kr = vm_deallocate(kernel_map, buffer, bufSize); + assert(KERN_SUCCESS == kr); + OSSafeReleaseNULL(mapper); + + return (0); +} + // static int IOMemoryRemoteTest(int newValue) @@ -688,6 +769,9 @@ int IOMemoryDescriptorTest(int newValue) } #endif +// result = IODMACommandLocalMappedNonContig(newValue); +// if (result) return (result); + result = IODMACommandForceDoubleBufferTest(newValue); if (result) return (result); diff --git a/libkern/c++/OSKext.cpp b/libkern/c++/OSKext.cpp index 797fd38a2..feb99abad 100644 --- a/libkern/c++/OSKext.cpp +++ b/libkern/c++/OSKext.cpp @@ -266,6 +266,7 @@ static IORecursiveLock * sKextLock = NULL; static OSDictionary * sKextsByID = NULL; static OSDictionary * sExcludeListByID = NULL; +static OSKextVersion sExcludeListVersion = 0; static OSArray * sLoadedKexts = NULL; static OSArray * sUnloadedPrelinkedKexts = NULL; @@ -3757,26 +3758,11 @@ OSKext::createExcludeListFromBooterData( if ( myBundleID && strcmp( myBundleID->getCStringNoCopy(), "com.apple.driver.KextExcludeList" ) == 0 ) { - /* get copy of exclusion list dictionary */ - OSDictionary * myTempDict; // do not free - - myTempDict = OSDynamicCast( - OSDictionary, - theInfoDict->getObject("OSKextExcludeList")); - if ( NULL == myTempDict ) { + boolean_t updated = updateExcludeList(theInfoDict); + if (!updated) { /* 25322874 */ panic("Missing OSKextExcludeList dictionary\n"); } - - IORecursiveLockLock(sKextLock); - - /* get rid of old exclusion list */ - if (sExcludeListByID) { - OSSafeReleaseNULL(sExcludeListByID); - } - sExcludeListByID = OSDictionary::withDictionary(myTempDict, 0); - IORecursiveLockUnlock(sKextLock); - break; } @@ -3809,30 +3795,60 @@ OSKext::createExcludeListFromPrelinkInfo( OSArray * theInfoArray ) myInfoDict->getObject(kCFBundleIdentifierKey)); if ( myBundleID && strcmp( myBundleID->getCStringNoCopy(), "com.apple.driver.KextExcludeList" ) == 0 ) { - // get copy of exclude list dictionary - OSDictionary * myTempDict; // do not free - myTempDict = OSDynamicCast(OSDictionary, - myInfoDict->getObject("OSKextExcludeList")); - if ( NULL == myTempDict ) { + + boolean_t updated = updateExcludeList(myInfoDict); + if (!updated) { /* 25322874 */ panic("Missing OSKextExcludeList dictionary\n"); } - - IORecursiveLockLock(sKextLock); - // get rid of old exclude list - if (sExcludeListByID) { - OSSafeReleaseNULL(sExcludeListByID); - } - - sExcludeListByID = OSDictionary::withDictionary(myTempDict, 0); - IORecursiveLockUnlock(sKextLock); break; } } // for (i = 0; i < theInfoArray->getCount()... - + return; } +/* static */ +boolean_t +OSKext::updateExcludeList(OSDictionary *infoDict) +{ + OSDictionary *myTempDict = NULL; // do not free + OSString *myTempString = NULL; // do not free + OSKextVersion newVersion = 0; + boolean_t updated = false; + + if (!infoDict) { + return false; + } + + myTempDict = OSDynamicCast(OSDictionary, infoDict->getObject("OSKextExcludeList")); + if (!myTempDict) { + return false; + } + + myTempString = OSDynamicCast(OSString, infoDict->getObject(kCFBundleVersionKey)); + if (!myTempString) { + return false; + } + + newVersion = OSKextParseVersionString(myTempString->getCStringNoCopy()); + if (newVersion == 0) { + return false; + } + + IORecursiveLockLock(sKextLock); + + if (newVersion > sExcludeListVersion) { + OSSafeReleaseNULL(sExcludeListByID); + sExcludeListByID = OSDictionary::withDictionary(myTempDict, 0); + sExcludeListVersion = newVersion; + updated = true; + } + + IORecursiveLockUnlock(sKextLock); + return updated; +} + #if PRAGMA_MARK #pragma mark Accessors #endif @@ -4285,16 +4301,26 @@ OSKext::isInExcludeList(void) size_t i; boolean_t wantLessThan = false; boolean_t wantLessThanEqualTo = false; + boolean_t isInExcludeList = true; char myBuffer[32]; + IORecursiveLockLock(sKextLock); + if (!sExcludeListByID) { - return(false); + isInExcludeList = false; + } else { + /* look up by bundleID in our exclude list and if found get version + * string (or strings) that we will not allow to load + */ + versionString = OSDynamicCast(OSString, sExcludeListByID->getObject(bundleID)); + if (versionString == NULL || versionString->getLength() > (sizeof(myBuffer) - 1)) { + isInExcludeList = false; + } } - /* look up by bundleID in our exclude list and if found get version - * string (or strings) that we will not allow to load - */ - versionString = OSDynamicCast(OSString, sExcludeListByID->getObject(bundleID)); - if (versionString == NULL || versionString->getLength() > (sizeof(myBuffer) - 1)) { + + IORecursiveLockUnlock(sKextLock); + + if (!isInExcludeList) { return(false); } @@ -4652,6 +4678,18 @@ OSKext::load( * personalities within the load function. */ if (!declaresExecutable()) { + /* There is a special case where a non-executable kext can be loaded: the + * AppleKextExcludeList. Detect that special kext by bundle identifier and + * load its metadata into the global data structures, if appropriate + */ + if (strcmp(getIdentifierCString(), "com.apple.driver.KextExcludeList") == 0) { + boolean_t updated = updateExcludeList(infoDict); + if (updated) { + OSKextLog(this, + kOSKextLogDebugLevel | kOSKextLogLoadFlag, + "KextExcludeList was updated to version: %lld", sExcludeListVersion); + } + } result = kOSReturnSuccess; goto loaded; } @@ -11883,4 +11921,3 @@ int OSKextGetUUIDForName(const char *name, uuid_t uuid) return 1; } #endif - diff --git a/libkern/libkern/c++/OSKext.h b/libkern/libkern/c++/OSKext.h index 2fc70eb1c..581c07b54 100644 --- a/libkern/libkern/c++/OSKext.h +++ b/libkern/libkern/c++/OSKext.h @@ -630,6 +630,7 @@ class OSKext : public OSObject OSDictionary * theDictionary, OSCollectionIterator * theIterator); static void createExcludeListFromPrelinkInfo(OSArray * theInfoArray); + static boolean_t updateExcludeList(OSDictionary * infoDict); static bool isWaitingKextd(void); diff --git a/libkern/os/log.c b/libkern/os/log.c index a019b7bd9..143269862 100644 --- a/libkern/os/log.c +++ b/libkern/os/log.c @@ -45,7 +45,7 @@ extern firehose_chunk_t firehose_boot_chunk; extern void bsd_log_lock(void); extern void bsd_log_unlock(void); -extern void logwakeup(void); +extern void logwakeup(struct msgbuf *); decl_lck_spin_data(extern, oslog_stream_lock) extern void oslog_streamwakeup(void); @@ -196,7 +196,13 @@ _os_log_to_msgbuf_internal(const char *format, va_list args, bool safe, bool log static int msgbufreplay = -1; va_list args_copy; +#if DEVELOPMENT || DEBUG + if (safe) { + bsd_log_lock(); + } +#else bsd_log_lock(); +#endif if (!safe) { if (-1 == msgbufreplay) msgbufreplay = msgbufp->msg_bufx; @@ -253,9 +259,15 @@ _os_log_to_msgbuf_internal(const char *format, va_list args, bool safe, bool log vprintf_log_locked(format, args_copy); va_end(args_copy); +#if DEVELOPMENT || DEBUG + if (safe) { + bsd_log_unlock(); + logwakeup(msgbufp); + } +#else bsd_log_unlock(); - - if (safe) logwakeup(); + if (safe) logwakeup(msgbufp); +#endif } static void diff --git a/osfmk/arm/arm_init.c b/osfmk/arm/arm_init.c index a9bb6d407..e81af968b 100644 --- a/osfmk/arm/arm_init.c +++ b/osfmk/arm/arm_init.c @@ -92,10 +92,6 @@ extern const char version[]; extern const char version_variant[]; extern int disableConsoleOutput; -#if __ARM_PAN_AVAILABLE__ -SECURITY_READ_ONLY_LATE(boolean_t) arm_pan_enabled = FALSE; /* PAN support on Hurricane and newer HW */ -#endif - int pc_trace_buf[PC_TRACE_BUF_SIZE] = {0}; int pc_trace_cnt = PC_TRACE_BUF_SIZE; int debug_task; @@ -304,18 +300,7 @@ arm_init( PE_parse_boot_argn("immediate_NMI", &force_immediate_debug_halt, sizeof(force_immediate_debug_halt)); #if __ARM_PAN_AVAILABLE__ -#if (DEVELOPMENT || DEBUG) - boolean_t pan; - if (!PE_parse_boot_argn("-pmap_smap_disable", &pan, sizeof(pan))) { - arm_pan_enabled = TRUE; - __builtin_arm_wsr("pan", 1); - set_mmu_control((get_mmu_control()) & ~SCTLR_PAN_UNCHANGED); - } -#else - arm_pan_enabled = TRUE; __builtin_arm_wsr("pan", 1); - /* SCTLR_EL1.SPAN is clear on RELEASE */ -#endif #endif /* __ARM_PAN_AVAILABLE__ */ arm_vm_init(xmaxmem, args); @@ -411,15 +396,7 @@ arm_init_cpu( cpu_data_t *cpu_data_ptr) { #if __ARM_PAN_AVAILABLE__ -#if (DEVELOPMENT || DEBUG) - if (arm_pan_enabled) { - __builtin_arm_wsr("pan", 1); - set_mmu_control((get_mmu_control()) & ~SCTLR_PAN_UNCHANGED); - } -#else __builtin_arm_wsr("pan", 1); - /* SCTLR_EL1.SPAN is clear on RELEASE */ -#endif #endif cpu_data_ptr->cpu_flags &= ~SleepState; @@ -495,15 +472,7 @@ arm_init_idle_cpu( cpu_data_t *cpu_data_ptr) { #if __ARM_PAN_AVAILABLE__ -#if (DEVELOPMENT || DEBUG) - if (arm_pan_enabled) { - __builtin_arm_wsr("pan", 1); - set_mmu_control((get_mmu_control()) & ~SCTLR_PAN_UNCHANGED); - } -#else __builtin_arm_wsr("pan", 1); - /* SCTLR_EL1.SPAN is clear on RELEASE */ -#endif #endif #if __ARM_SMP__ && defined(ARMA7) cpu_data_ptr->cpu_CLW_active = 1; diff --git a/osfmk/arm/machine_routines.c b/osfmk/arm/machine_routines.c index b86bd643b..bc3e4f8eb 100644 --- a/osfmk/arm/machine_routines.c +++ b/osfmk/arm/machine_routines.c @@ -51,7 +51,6 @@ #include #include -#include #if KPC #include @@ -70,7 +69,6 @@ boolean_t is_clock_configured = FALSE; extern int mach_assert; extern volatile uint32_t debug_enabled; -SECURITY_READ_ONLY_LATE(unsigned int) debug_boot_arg; void machine_conf(void); @@ -79,20 +77,6 @@ machine_startup(__unused boot_args * args) { int boot_arg; -#if MACH_KDP - if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg)) && - debug_enabled) { -#if DEVELOPMENT || DEBUG - if (debug_boot_arg & DB_HALT) - halt_in_debugger = 1; -#endif - if (debug_boot_arg & DB_NMI) - panicDebugging = TRUE; - } else { - debug_boot_arg = 0; - } -#endif - PE_parse_boot_argn("assert", &mach_assert, sizeof (mach_assert)); if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) { @@ -668,12 +652,13 @@ cause_ast_check( boolean_t ml_at_interrupt_context(void) { - vm_offset_t stack_ptr; - vm_offset_t intstack_top_ptr; + boolean_t at_interrupt_context = FALSE; - __asm__ volatile("mov %0, sp\n":"=r"(stack_ptr)); - intstack_top_ptr = getCpuDatap()->intstack_top; - return ((stack_ptr < intstack_top_ptr) && (stack_ptr > intstack_top_ptr - INTSTACK_SIZE)); + disable_preemption(); + at_interrupt_context = (getCpuDatap()->cpu_int_state != NULL); + enable_preemption(); + + return at_interrupt_context; } extern uint32_t cpu_idle_count; @@ -987,8 +972,10 @@ vm_offset_t ml_stack_remaining(void) { uintptr_t local = (uintptr_t) &local; + vm_offset_t intstack_top_ptr; - if (ml_at_interrupt_context()) { + intstack_top_ptr = getCpuDatap()->intstack_top; + if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) { return (local - (getCpuDatap()->intstack_top - INTSTACK_SIZE)); } else { return (local - current_thread()->kernel_stack); diff --git a/osfmk/arm/pmap.h b/osfmk/arm/pmap.h index 349ebc17f..75ed29d26 100644 --- a/osfmk/arm/pmap.h +++ b/osfmk/arm/pmap.h @@ -160,7 +160,6 @@ extern void flush_mmu_tlb_region(vm_offset_t va, unsigned length); #if defined(__arm64__) extern uint64_t get_mmu_control(void); -extern void set_mmu_control(uint64_t); extern uint64_t get_aux_control(void); extern void set_aux_control(uint64_t); extern void set_mmu_ttb(uint64_t); diff --git a/osfmk/arm64/copyio.c b/osfmk/arm64/copyio.c index 7d7974d5d..2e47825dd 100644 --- a/osfmk/arm64/copyio.c +++ b/osfmk/arm64/copyio.c @@ -40,7 +40,6 @@ extern int _bcopyout(const char *src, char *dst, vm_size_t len); extern int _copyin_word(const char *src, uint64_t *dst, vm_size_t len); extern pmap_t kernel_pmap; -extern boolean_t arm_pan_enabled; typedef enum copyio_type { COPYIO_IN, @@ -65,9 +64,7 @@ static inline void user_access_enable(void) { #if __ARM_PAN_AVAILABLE__ - if (arm_pan_enabled) { - __builtin_arm_wsr("pan", 0); - } + __builtin_arm_wsr("pan", 0); #endif /* __ARM_PAN_AVAILABLE__ */ } @@ -75,9 +72,7 @@ static inline void user_access_disable(void) { #if __ARM_PAN_AVAILABLE__ - if (arm_pan_enabled) { - __builtin_arm_wsr("pan", 1); - } + __builtin_arm_wsr("pan", 1); #endif /* __ARM_PAN_AVAILABLE__ */ } @@ -110,7 +105,7 @@ copyio(copyio_type_t copytype, const char *src, char *dst, } #endif - user_access_enable(); + user_access_enable(); /* Select copy routines based on direction: * COPYIO_IN - Use unprivileged loads to read from user address @@ -137,7 +132,7 @@ copyio(copyio_type_t copytype, const char *src, char *dst, result = EINVAL; } - user_access_disable(); + user_access_disable(); return result; } diff --git a/osfmk/arm64/machine_routines.c b/osfmk/arm64/machine_routines.c index c4e6ba138..fefbaa51b 100644 --- a/osfmk/arm64/machine_routines.c +++ b/osfmk/arm64/machine_routines.c @@ -51,7 +51,6 @@ #include #include -#include #if defined(KERNEL_INTEGRITY_KTRR) #include @@ -73,7 +72,6 @@ boolean_t is_clock_configured = FALSE; extern int mach_assert; extern volatile uint32_t debug_enabled; -SECURITY_READ_ONLY_LATE(unsigned int) debug_boot_arg; void machine_conf(void); @@ -420,19 +418,6 @@ machine_startup(__unused boot_args * args) int boot_arg; -#if MACH_KDP - if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg)) && - debug_enabled) { - if (debug_boot_arg & DB_HALT) - halt_in_debugger = 1; - if (debug_boot_arg & DB_NMI) - panicDebugging = TRUE; - } else { - debug_boot_arg = 0; - } - -#endif - PE_parse_boot_argn("assert", &mach_assert, sizeof (mach_assert)); if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) { diff --git a/osfmk/arm64/machine_routines_asm.s b/osfmk/arm64/machine_routines_asm.s index 915dffe57..c23ec81da 100644 --- a/osfmk/arm64/machine_routines_asm.s +++ b/osfmk/arm64/machine_routines_asm.s @@ -277,20 +277,6 @@ LEXT(set_aux_control) isb sy ret -#if (DEVELOPMENT || DEBUG) -/* - * set MMU control register - */ - .text - .align 2 - .globl EXT(set_mmu_control) -LEXT(set_mmu_control) - msr SCTLR_EL1, x0 - dsb sy - isb sy - ret -#endif - /* * set translation control register diff --git a/osfmk/arm64/platform_tests.c b/osfmk/arm64/platform_tests.c index 0c20f8a02..689250acf 100644 --- a/osfmk/arm64/platform_tests.c +++ b/osfmk/arm64/platform_tests.c @@ -79,7 +79,6 @@ #include #include -extern boolean_t arm_pan_enabled; kern_return_t arm64_lock_test(void); kern_return_t arm64_munger_test(void); kern_return_t ex_cb_test(void); @@ -87,7 +86,11 @@ kern_return_t arm64_pan_test(void); // exception handler ignores this fault address during PAN test #if __ARM_PAN_AVAILABLE__ -vm_offset_t pan_test_addr; +const uint64_t pan_ro_value = 0xFEEDB0B0DEADBEEF; +vm_offset_t pan_test_addr = 0; +vm_offset_t pan_ro_addr = 0; +volatile int pan_exception_level = 0; +volatile char pan_fault_value = 0; #endif #include @@ -1037,19 +1040,14 @@ ex_cb_test() kern_return_t arm64_pan_test() { - unsigned long last_pan_config; vm_offset_t priv_addr = _COMM_PAGE_SIGNATURE; T_LOG("Testing PAN."); - last_pan_config = __builtin_arm_rsr("pan"); - if (!last_pan_config) { - T_ASSERT(!arm_pan_enabled, "PAN is not enabled even though it is configured to be"); - __builtin_arm_wsr("pan", 1); - } - T_ASSERT(__builtin_arm_rsr("pan") != 0, NULL); + pan_exception_level = 0; + pan_fault_value = 0xDE; // convert priv_addr to one that is accessible from user mode pan_test_addr = priv_addr + _COMM_PAGE64_BASE_ADDRESS - _COMM_PAGE_START_ADDRESS; @@ -1059,14 +1057,31 @@ arm64_pan_test() // The exception handler, upon recognizing the fault address is pan_test_addr, // will disable PAN and rerun this instruction successfully T_ASSERT(*(char *)pan_test_addr == *(char *)priv_addr, NULL); - pan_test_addr = 0; + + T_ASSERT(pan_exception_level == 2, NULL); T_ASSERT(__builtin_arm_rsr("pan") == 0, NULL); - // restore previous PAN config value - if (last_pan_config) - __builtin_arm_wsr("pan", 1); + T_ASSERT(pan_fault_value == *(char *)priv_addr, NULL); + + pan_exception_level = 0; + pan_fault_value = 0xAD; + pan_ro_addr = (vm_offset_t) &pan_ro_value; + + // Force a permission fault while PAN is disabled to make sure PAN is + // re-enabled during the exception handler. + *((volatile uint64_t*)pan_ro_addr) = 0xFEEDFACECAFECAFE; + + T_ASSERT(pan_exception_level == 2, NULL); + + T_ASSERT(__builtin_arm_rsr("pan") == 0, NULL); + + T_ASSERT(pan_fault_value == *(char *)priv_addr, NULL); + + pan_test_addr = 0; + pan_ro_addr = 0; + __builtin_arm_wsr("pan", 1); return KERN_SUCCESS; } #endif diff --git a/osfmk/arm64/proc_reg.h b/osfmk/arm64/proc_reg.h index 370ed6347..55c370635 100644 --- a/osfmk/arm64/proc_reg.h +++ b/osfmk/arm64/proc_reg.h @@ -227,11 +227,8 @@ #define SCTLR_PAC_DEFAULT 0 #define SCTLR_EL1_DEFAULT (SCTLR_PAC_DEFAULT | SCTLR_RESERVED | SCTLR_UCI_ENABLED | SCTLR_nTWE_WFE_ENABLED | SCTLR_DZE_ENABLED | \ - SCTLR_I_ENABLED | SCTLR_SED_DISABLED | SCTLR_CP15BEN_ENABLED | \ - SCTLR_SA0_ENABLED | SCTLR_SA_ENABLED | SCTLR_PAN_UNCHANGED | \ - SCTLR_C_ENABLED | SCTLR_M_ENABLED) - - + SCTLR_I_ENABLED | SCTLR_SED_DISABLED | SCTLR_CP15BEN_ENABLED | \ + SCTLR_SA0_ENABLED | SCTLR_SA_ENABLED | SCTLR_C_ENABLED | SCTLR_M_ENABLED) /* * Coprocessor Access Control Register (CPACR) diff --git a/osfmk/arm64/sleh.c b/osfmk/arm64/sleh.c index ba7484a83..f77de678b 100644 --- a/osfmk/arm64/sleh.c +++ b/osfmk/arm64/sleh.c @@ -179,7 +179,6 @@ extern boolean_t pgtrace_enabled; #endif #if __ARM_PAN_AVAILABLE__ -extern boolean_t arm_pan_enabled; #endif #if defined(APPLECYCLONE) @@ -1086,6 +1085,13 @@ handle_kernel_abort(arm_saved_state_t *state, uint32_t esr, vm_offset_t fault_ad vm_map_t map; int interruptible; + /* + * Ensure no faults in the physical aperture. This could happen if + * a page table is incorrectly allocated from the read only region + * when running with KTRR. + */ + + if (fault_addr >= gVirtBase && fault_addr < (gVirtBase+gPhysSize)) { panic_with_thread_kernel_state("Unexpected fault in kernel static region\n",state); } @@ -1139,8 +1145,8 @@ handle_kernel_abort(arm_saved_state_t *state, uint32_t esr, vm_offset_t fault_ad #endif #if CONFIG_PGTRACE - } else if (ml_at_interrupt_context()) { - panic_with_thread_kernel_state("Unexpected abort while on interrupt stack.", state); + } else if (ml_at_interrupt_context()) { + panic_with_thread_kernel_state("Unexpected abort while on interrupt stack.", state); #endif } else if (is_alignment_fault(fault_code)) { panic_with_thread_kernel_state("Unaligned kernel data abort.", state); diff --git a/osfmk/atm/atm_types.h b/osfmk/atm/atm_types.h index 36f2f2dad..2169e6c0e 100644 --- a/osfmk/atm/atm_types.h +++ b/osfmk/atm/atm_types.h @@ -66,7 +66,7 @@ typedef uint64_t *atm_memory_size_array_t; #define ATM_SUBAID32_MAX (UINT32_MAX) #define ATM_TRACE_DISABLE (0x0100) /* OS_TRACE_MODE_DISABLE - Do not initialize the new logging*/ #define ATM_TRACE_OFF (0x0400) /* OS_TRACE_MODE_OFF - Don't drop log messages to new log buffers */ -#define ATM_ENABLE_LEGACY_LOGGING (0x0200) /* OS_TRACE_SYSTEMMODE_LEGACY_LOGGING - Enable legacy logging */ +#define ATM_ENABLE_LEGACY_LOGGING (0x20000000) /* OS_TRACE_SYSTEMMODE_LEGACY_LOGGING - Enable legacy logging */ #endif /* _ATM_ATM_TYPES_H_ */ diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index 99b974a13..2d29bf968 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -127,7 +127,6 @@ #include #include -#include #if DEBUG || DEVELOPMENT #define DPRINTF(x...) kprintf(x) @@ -173,22 +172,20 @@ static unsigned panic_io_port; static unsigned commit_paniclog_to_nvram; boolean_t coprocessor_paniclog_flush = FALSE; -#if DEVELOPMENT || DEBUG struct kcdata_descriptor kc_panic_data; static boolean_t begun_panic_stackshot = FALSE; - -vm_offset_t panic_stackshot_buf = 0; -size_t panic_stackshot_len = 0; - extern kern_return_t do_stackshot(void *); + extern void kdp_snapshot_preflight(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t flags, kcdata_descriptor_t data_p, boolean_t enable_faulting); extern int kdp_stack_snapshot_bytes_traced(void); -#endif -SECURITY_READ_ONLY_LATE(unsigned int) debug_boot_arg; +#if DEVELOPMENT || DEBUG +vm_offset_t panic_stackshot_buf = 0; +size_t panic_stackshot_len = 0; +#endif /* * Backtrace a single frame. @@ -279,20 +276,6 @@ machine_startup(void) halt_in_debugger = halt_in_debugger ? 0 : 1; #endif - if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg))) { - panicDebugging = TRUE; -#if DEVELOPMENT || DEBUG - if (debug_boot_arg & DB_HALT) halt_in_debugger=1; -#endif -#if KDEBUG_MOJO_TRACE - if (debug_boot_arg & DB_PRT_KDEBUG) { - kdebug_serial = TRUE; - } -#endif - } else { - debug_boot_arg = 0; - } - if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram))) commit_paniclog_to_nvram = 1; @@ -303,9 +286,6 @@ machine_startup(void) if (PE_parse_boot_argn("pmsafe_debug", &boot_arg, sizeof (boot_arg))) pmsafe_debug = boot_arg; -#if NOTYET - hw_lock_init(&debugger_lock); /* initialize debugger lock */ -#endif hw_lock_init(&pbtlock); /* initialize print backtrace lock */ if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) { @@ -857,26 +837,116 @@ uint64_t panic_restart_timeout = ~(0ULL); #define PANIC_RESTART_TIMEOUT (3ULL * NSEC_PER_SEC) +/* + * We should always return from this function with the other log offset + * set in the panic_info structure. + */ void RecordPanicStackshot() { -#if DEVELOPMENT || DEBUG - int err = 0, bytes_traced = 0, bytes_used = 0; - /* Try to take a stackshot once at panic time */ + int err = 0, bytes_traced = 0, bytes_used = 0, bytes_remaining = 0; + char *stackshot_begin_loc = NULL; + + /* Don't re-enter this code if we panic here */ if (begun_panic_stackshot) { + if (panic_info->mph_other_log_offset == 0) { + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + } return; } begun_panic_stackshot = TRUE; + /* The panic log length should have been set before we came to capture a stackshot */ + if (panic_info->mph_panic_log_len == 0) { + kdb_printf("Found zero length panic log, skipping capturing panic stackshot\n"); + if (panic_info->mph_other_log_offset == 0) { + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + } + return; + } + + /* + * Try to capture an in memory panic_stackshot (enabled during boot + * on systems with co-processors). + */ + if (extended_debug_log_enabled) { + if (stackshot_active()) { + panic_info->mph_panic_flags |= MACOS_PANIC_HEADER_FLAG_STACKSHOT_FAILED_NESTED; + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + kdb_printf("Panicked during stackshot, skipping panic stackshot\n"); + return; + } else { + stackshot_begin_loc = debug_buf_ptr; + + bytes_remaining = debug_buf_size - (unsigned int)((uintptr_t)stackshot_begin_loc - (uintptr_t)debug_buf_base); + err = kcdata_memory_static_init(&kc_panic_data, (mach_vm_address_t)stackshot_begin_loc, + KCDATA_BUFFER_BEGIN_STACKSHOT, bytes_remaining, KCFLAG_USE_MEMCOPY); + if (err != KERN_SUCCESS) { + panic_info->mph_panic_flags |= MACOS_PANIC_HEADER_FLAG_STACKSHOT_FAILED_ERROR; + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + kdb_printf("Failed to initialize kcdata buffer for in-memory panic stackshot, skipping ...\n"); + return; + } + + kdp_snapshot_preflight(-1, (void *) stackshot_begin_loc, bytes_remaining, + (STACKSHOT_KCDATA_FORMAT | STACKSHOT_NO_IO_STATS | STACKSHOT_SAVE_KEXT_LOADINFO | + STACKSHOT_ACTIVE_KERNEL_THREADS_ONLY | STACKSHOT_FROM_PANIC | STACKSHOT_THREAD_WAITINFO), &kc_panic_data, 0); + err = do_stackshot(NULL); + bytes_traced = (int) kdp_stack_snapshot_bytes_traced(); + if (bytes_traced > 0 && !err) { + debug_buf_ptr += bytes_traced; + panic_info->mph_panic_flags |= MACOS_PANIC_HEADER_FLAG_STACKSHOT_SUCCEEDED; + panic_info->mph_stackshot_offset = PE_get_offset_into_panic_region(stackshot_begin_loc); + panic_info->mph_stackshot_len = bytes_traced; + + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + kdb_printf("\n** In Memory Panic Stackshot Succeeded ** Bytes Traced %d **\n", bytes_traced); + } else { + bytes_used = (int) kcdata_memory_get_used_bytes(&kc_panic_data); + if (bytes_used > 0) { + /* Zero out the stackshot data */ + bzero(stackshot_begin_loc, bytes_used); + panic_info->mph_panic_flags |= MACOS_PANIC_HEADER_FLAG_STACKSHOT_FAILED_INCOMPLETE; + + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + kdb_printf("\n** In Memory Panic Stackshot Incomplete ** Bytes Filled %d ** Err %d\n", bytes_used, err); + } else { + bzero(stackshot_begin_loc, bytes_used); + panic_info->mph_panic_flags |= MACOS_PANIC_HEADER_FLAG_STACKSHOT_FAILED_ERROR; + + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + kdb_printf("\n** In Memory Panic Stackshot Failed ** Bytes Traced %d, err %d\n", bytes_traced, err); + } + } + } + + paniclog_flush(); +#if DEVELOPMENT || DEBUG + if (panic_stackshot_buf != 0) { + // We're going to try to take another stackshot, reset the state. + panic_stackshot_reset_state(); + } +#endif /* DEVELOPMENT || DEBUG */ + } else { + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + } + +#if DEVELOPMENT || DEBUG + if (panic_stackshot_buf == 0) { - kdb_printf("No stackshot buffer allocated, skipping...\n"); + kdb_printf("No stackshot buffer allocated for file backed panic stackshot, skipping...\n"); + return; + } + + if (stackshot_active()) { + kdb_printf("Panicked during stackshot, skipping file backed panic stackshot\n"); return; } err = kcdata_memory_static_init(&kc_panic_data, (mach_vm_address_t)panic_stackshot_buf, KCDATA_BUFFER_BEGIN_STACKSHOT, PANIC_STACKSHOT_BUFSIZE, KCFLAG_USE_MEMCOPY); if (err != KERN_SUCCESS) { - kdb_printf("Failed to initialize kcdata buffer for panic stackshot, skipping ...\n"); + kdb_printf("Failed to initialize kcdata buffer for file backed panic stackshot, skipping ...\n"); return; } @@ -887,17 +957,17 @@ RecordPanicStackshot() bytes_traced = (int) kdp_stack_snapshot_bytes_traced(); if (bytes_traced > 0 && !err) { panic_stackshot_len = bytes_traced; - kdb_printf("Panic stackshot succeeded, length: %u bytes\n", bytes_traced); + kdb_printf("File backed panic stackshot succeeded, length: %u bytes\n", bytes_traced); } else { bytes_used = (int) kcdata_memory_get_used_bytes(&kc_panic_data); if (bytes_used > 0) { - kdb_printf("Panic stackshot incomplete, consumed %u bytes\n", bytes_used); + kdb_printf("File backed panic stackshot incomplete, consumed %u bytes, error : %d \n", bytes_used, err); } else { - kdb_printf("Panic stackshot incomplete, consumed %u bytes, error : %d \n", bytes_used, err); + kdb_printf("File backed panic stackshot incomplete, consumed %u bytes, error : %d \n", bytes_used, err); } } - #endif /* DEVELOPMENT || DEBUG */ + return; } @@ -917,7 +987,7 @@ SavePanicInfo( /* Obtain current frame pointer */ __asm__ volatile("movq %%rbp, %0" : "=m" (stackptr)); - /* Print backtrace - callee is internally synchronized */ + /* Print backtrace - callee is internally synchronized */ if (panic_options & DEBUGGER_OPTION_INITPROC_PANIC) { /* Special handling of launchd died panics */ print_launchd_info(); @@ -929,11 +999,27 @@ SavePanicInfo( panic_info->mph_panic_flags |= MACOS_PANIC_HEADER_FLAG_COPROC_INITIATED_PANIC; } - /* Flush the paniclog */ + if (PE_get_offset_into_panic_region(debug_buf_ptr) < panic_info->mph_panic_log_offset) { + kdb_printf("Invalid panic log offset found (not properly initialized?): debug_buf_ptr : 0x%p, panic_info: 0x%p mph_panic_log_offset: 0x%x\n", + debug_buf_ptr, panic_info, panic_info->mph_panic_log_offset); + panic_info->mph_panic_log_len = 0; + } else { + panic_info->mph_panic_log_len = PE_get_offset_into_panic_region(debug_buf_ptr) - panic_info->mph_panic_log_offset; + } + + /* Flush the panic log */ paniclog_flush(); /* Try to take a panic stackshot */ RecordPanicStackshot(); + + /* + * Flush the panic log again with the stackshot or any relevant logging + * from when we tried to capture it. + */ + if (extended_debug_log_enabled) { + paniclog_flush(); + } } void @@ -942,25 +1028,39 @@ paniclog_flush() unsigned long pi_size = 0; assert(panic_info != NULL); - panic_info->mph_panic_log_len = PE_get_offset_into_panic_region(debug_buf_ptr) - panic_info->mph_panic_log_offset; + + /* Update the other log offset if we've opened the other log */ + if (panic_info->mph_other_log_offset != 0) { + panic_info->mph_other_log_len = PE_get_offset_into_panic_region(debug_buf_ptr) - panic_info->mph_other_log_offset; + } /* - * If we've detected that we're on a co-processor system we flush the panic log via the kPEPanicSync + * If we've detected that we're on a co-processor system, we flush the panic log via the kPEPanicSync * panic callbacks, otherwise we flush via nvram (unless that has been disabled). */ if (coprocessor_paniclog_flush) { - /* Only need to calculate the CRC for co-processor platforms */ - panic_info->mph_crc = crc32(0L, &panic_info->mph_version, (debug_buf_size - offsetof(struct macos_panic_header, mph_version))); + unsigned int size_to_flush = debug_buf_size; + if (extended_debug_log_enabled) { + /* + * debug_buf_size for the extended log does not include the length of the header. + * There may be some extra data at the end of the 'basic' log that wouldn't get flushed + * for the non-extended case (this is a concession we make to not shrink the paniclog data + * for non-coprocessor systems that only use the basic log). + */ + size_to_flush = debug_buf_size + sizeof(struct macos_panic_header); + } + + /* We need to calculate the CRC for co-processor platforms */ + panic_info->mph_crc = crc32(0L, &panic_info->mph_version, (size_to_flush - offsetof(struct macos_panic_header, mph_version))); - PESavePanicInfoAction(debug_buf, debug_buf_size); - } else if(commit_paniclog_to_nvram) { + PESavePanicInfoAction(panic_info, size_to_flush); + } else if (commit_paniclog_to_nvram) { assert(debug_buf_size != 0); unsigned int bufpos; uintptr_t cr0; debug_putc(0); - /* * Now call the compressor * XXX Consider using the WKdm compressor in the diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 41c2cad86..083882238 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -1271,6 +1271,8 @@ fpextovrflt(void) /*NOTREACHED*/ } +extern void fpxlog(int, uint32_t, uint32_t, uint32_t); + /* * FPU error. Called by AST. */ @@ -1296,6 +1298,11 @@ fpexterrflt(void) (void)ml_set_interrupts_enabled(intr); + const uint32_t mask = ifps->fx_control & + (FPC_IM | FPC_DM | FPC_ZM | FPC_OM | FPC_UE | FPC_PE); + const uint32_t xcpt = ~mask & (ifps->fx_status & + (FPS_IE | FPS_DE | FPS_ZE | FPS_OE | FPS_UE | FPS_PE)); + fpxlog(EXC_I386_EXTERR, ifps->fx_status, ifps->fx_control, xcpt); /* * Raise FPU exception. * Locking not needed on pcb->ifps, @@ -1393,6 +1400,11 @@ fpSSEexterrflt(void) * Locking not needed on pcb->ifps, * since thread is running. */ + const uint32_t mask = (ifps->fx_MXCSR >> 7) & + (FPC_IM | FPC_DM | FPC_ZM | FPC_OM | FPC_UE | FPC_PE); + const uint32_t xcpt = ~mask & (ifps->fx_MXCSR & + (FPS_IE | FPS_DE | FPS_ZE | FPS_OE | FPS_UE | FPS_PE)); + fpxlog(EXC_I386_SSEEXTERR, ifps->fx_MXCSR, ifps->fx_MXCSR, xcpt); i386_exception(EXC_ARITHMETIC, EXC_I386_SSEEXTERR, @@ -1411,22 +1423,38 @@ fpSSEexterrflt(void) static void fpu_savearea_promote_avx512(thread_t thread) { - struct x86_avx_thread_state *ifps; - struct x86_avx512_thread_state *ifps512; + struct x86_avx_thread_state *ifps = NULL; + struct x86_avx512_thread_state *ifps512 = NULL; pcb_t pcb = THREAD_TO_PCB(thread); + boolean_t do_avx512_alloc = FALSE; DBG("fpu_upgrade_savearea(%p)\n", thread); - ifps512 = fp_state_alloc(AVX512); + simple_lock(&pcb->lock); + ifps = pcb->ifps; if (ifps == NULL) { - /* nothing to be done */ + pcb->xstate = AVX512; simple_unlock(&pcb->lock); - fp_state_free(ifps512, AVX512); - xsetbv(0, AVX512_XMASK); - DBG("fpu_upgrade_savearea() NULL ifps\n"); + if (thread != current_thread()) { + /* nothing to be done */ + + return; + } + fpnoextflt(); return; } + + if (pcb->xstate != AVX512) { + do_avx512_alloc = TRUE; + } + simple_unlock(&pcb->lock); + + if (do_avx512_alloc == TRUE) { + ifps512 = fp_state_alloc(AVX512); + } + + simple_lock(&pcb->lock); if (thread == current_thread()) { boolean_t intr; @@ -1443,12 +1471,25 @@ fpu_savearea_promote_avx512(thread_t thread) assert(ifps->fp.fp_valid); /* Allocate an AVX512 savearea and copy AVX state into it */ - bcopy(ifps, ifps512, fp_state_size[AVX]); - pcb->ifps = ifps512; - pcb->xstate = AVX512; - fp_state_free(ifps, AVX); - + if (pcb->xstate != AVX512) { + bcopy(ifps, ifps512, fp_state_size[AVX]); + pcb->ifps = ifps512; + pcb->xstate = AVX512; + ifps512 = NULL; + } else { + ifps = NULL; + } + /* The PCB lock is redundant in some scenarios given the higher level + * thread mutex, but its pre-emption disablement is relied upon here + */ simple_unlock(&pcb->lock); + + if (ifps) { + fp_state_free(ifps, AVX); + } + if (ifps512) { + fp_state_free(ifps, AVX512); + } } /* @@ -1481,17 +1522,26 @@ fpu_thread_promote_avx512(thread_t thread) * return directly via thread_exception_return(). * Otherwise simply return. */ +#define MAX_X86_INSN_LENGTH (16) void fpUDflt(user_addr_t rip) { uint8_t instruction_prefix; boolean_t is_AVX512_instruction = FALSE; - + user_addr_t original_rip = rip; do { - if (copyin(rip, (char *) &instruction_prefix, 1)) + /* TODO: as an optimisation, copy up to the lesser of the + * next page boundary or maximal prefix length in one pass + * rather than issue multiple copyins + */ + if (copyin(rip, (char *) &instruction_prefix, 1)) { return; + } DBG("fpUDflt(0x%016llx) prefix: 0x%x\n", rip, instruction_prefix); + /* TODO: determine more specifically which prefixes + * are sane possibilities for AVX512 insns + */ switch (instruction_prefix) { case 0x2E: /* CS segment override */ case 0x36: /* SS segment override */ @@ -1499,9 +1549,13 @@ fpUDflt(user_addr_t rip) case 0x26: /* ES segment override */ case 0x64: /* FS segment override */ case 0x65: /* GS segment override */ + case 0x66: /* Operand-size override */ case 0x67: /* address-size override */ /* Skip optional prefixes */ rip++; + if ((rip - original_rip) > MAX_X86_INSN_LENGTH) { + return; + } break; case 0x62: /* EVEX */ case 0xC5: /* VEX 2-byte */ @@ -1516,11 +1570,9 @@ fpUDflt(user_addr_t rip) /* Here if we detect attempted execution of an AVX512 instruction */ /* - * Fail if this machine doesn't support AVX512 or - * the current thread is (strangely) already in AVX512 mode. + * Fail if this machine doesn't support AVX512 */ - if (fpu_capability != AVX512 || - current_xstate() == AVX512) + if (fpu_capability != AVX512) return; assert(xgetbv(XCR0) == AVX_XMASK); diff --git a/osfmk/ipc/ipc_entry.c b/osfmk/ipc/ipc_entry.c index 9604a81b8..677e054ea 100644 --- a/osfmk/ipc/ipc_entry.c +++ b/osfmk/ipc/ipc_entry.c @@ -302,6 +302,9 @@ ipc_entry_alloc_name( mach_port_index_t index = MACH_PORT_INDEX(name); mach_port_gen_t gen = MACH_PORT_GEN(name); + if (index > ipc_table_max_entries()) + return KERN_NO_SPACE; + assert(MACH_PORT_VALID(name)); diff --git a/osfmk/ipc/ipc_table.c b/osfmk/ipc/ipc_table.c index b65071b28..76bc0254a 100644 --- a/osfmk/ipc/ipc_table.c +++ b/osfmk/ipc/ipc_table.c @@ -71,7 +71,7 @@ #include #include -ipc_table_size_t ipc_table_entries; +ipc_table_size_t ipc_table_entries = NULL; unsigned int ipc_table_entries_size = CONFIG_IPC_TABLE_ENTRIES_STEPS; ipc_table_size_t ipc_table_requests; @@ -148,6 +148,41 @@ ipc_table_init(void) ipc_table_requests[ipc_table_requests_size - 1].its_size = 0; } + +/* + * Routine: ipc_table_max_entries + * Purpose: + * returns the maximum number of entries an IPC space + * is allowed to contain (the maximum size to which it will grow) + * Conditions: + * none + */ +unsigned int +ipc_table_max_entries(void) +{ + if (!ipc_table_entries || ipc_table_entries_size < 2) + return 0; + return (unsigned int)ipc_table_entries[ipc_table_entries_size - 1].its_size; +} + + +/* + * Routine: ipc_table_max_requests + * Purpose: + * returns the maximum number of requests an IPC request table + * is allowed to contain (the maximum size to which it will grow) + * Conditions: + * none + */ +unsigned int +ipc_table_max_requests(void) +{ + if (!ipc_table_requests || ipc_table_requests_size < 2) + return 0; + return (unsigned int)ipc_table_requests[ipc_table_requests_size - 2].its_size; +} + + /* * Routine: ipc_table_alloc * Purpose: diff --git a/osfmk/ipc/ipc_table.h b/osfmk/ipc/ipc_table.h index 66a2d3d5a..2b092bbca 100644 --- a/osfmk/ipc/ipc_table.h +++ b/osfmk/ipc/ipc_table.h @@ -145,4 +145,7 @@ extern void ipc_table_free( sizeof(struct ipc_port_request), \ (void *)(table)) +extern unsigned int ipc_table_max_entries(void); +extern unsigned int ipc_table_max_requests(void); + #endif /* _IPC_IPC_TABLE_H_ */ diff --git a/osfmk/kdp/kdp_core.c b/osfmk/kdp/kdp_core.c index 186e2e7f0..5cf505770 100644 --- a/osfmk/kdp/kdp_core.c +++ b/osfmk/kdp/kdp_core.c @@ -1148,7 +1148,8 @@ do_kern_dump(kern_dump_output_proc outproc, enum kern_dump_type kd_variant) existing_log_size = (panic_info->eph_panic_log_offset - sizeof(struct embedded_panic_header)) + panic_info->eph_panic_log_len + panic_info->eph_other_log_len; #else /* CONFIG_EMBEDDED */ - existing_log_size = log_start - debug_buf_base; + existing_log_size = (panic_info->mph_panic_log_offset - sizeof(struct macos_panic_header)) + + panic_info->mph_panic_log_len + panic_info->mph_other_log_len; #endif /* CONFIG_EMBEDDED */ assert (existing_log_size <= debug_buf_size); @@ -1256,9 +1257,11 @@ do_kern_dump(kern_dump_output_proc outproc, enum kern_dump_type kd_variant) new_log_len = KERN_COREDUMP_MAXDEBUGLOGSIZE; } -#if CONFIG_EMBEDDED /* This data is after the panic stackshot, we need to write it separately */ +#if CONFIG_EMBEDDED existing_log_size -= panic_info->eph_other_log_len; +#else + existing_log_size -= panic_info->mph_other_log_len; #endif /* @@ -1273,13 +1276,17 @@ do_kern_dump(kern_dump_output_proc outproc, enum kern_dump_type kd_variant) goto exit; } + /* + * The next part of the log we're interested in is the beginning of the 'other' log. + * Include any data after the panic stackshot but before we started the coredump log + * (see above) + */ #if CONFIG_EMBEDDED - /* The next part of the log we're interested in is the beginning of the 'other' log */ buf = (char *)(((char *)panic_info) + (uintptr_t) panic_info->eph_other_log_offset); - /* Include any data after the panic stackshot but before we started the coredump log (see above) */ new_log_len += panic_info->eph_other_log_len; #else /* CONFIG_EMBEDDED */ - buf += existing_log_size; + buf = (char *)(((char *)panic_info) + (uintptr_t) panic_info->mph_other_log_offset); + new_log_len += panic_info->mph_other_log_len; #endif /* CONFIG_EMBEDDED */ /* Write the coredump log */ @@ -1301,6 +1308,16 @@ do_kern_dump(kern_dump_output_proc outproc, enum kern_dump_type kd_variant) dump_succeeded = FALSE; } +#if CONFIG_EMBEDDED + panic_info->eph_panic_flags |= (dump_succeeded ? EMBEDDED_PANIC_HEADER_FLAG_COREDUMP_COMPLETE : + EMBEDDED_PANIC_HEADER_FLAG_COREDUMP_FAILED); +#else + panic_info->mph_panic_flags |= (dump_succeeded ? MACOS_PANIC_HEADER_FLAG_COREDUMP_COMPLETE : + MACOS_PANIC_HEADER_FLAG_COREDUMP_FAILED); +#endif + /* We touched the panic header, flush it so we update the CRC */ + paniclog_flush(); + return (dump_succeeded ? 0 : -1); } diff --git a/osfmk/kern/clock.c b/osfmk/kern/clock.c index 0a256f220..d601291b6 100644 --- a/osfmk/kern/clock.c +++ b/osfmk/kern/clock.c @@ -84,9 +84,10 @@ #include #include #include +#include uint32_t hz_tick_interval = 1; - +static uint64_t has_monotonic_clock = 0; decl_simple_lock_data(,clock_lock) lck_grp_attr_t * settime_lock_grp_attr; @@ -237,6 +238,15 @@ bintime2absolutetime(const struct bintime *_bt, uint64_t *abs) nsec = (uint64_t) _bt->sec * (uint64_t)NSEC_PER_SEC + (((uint64_t)NSEC_PER_SEC * (uint32_t)(_bt->frac >> 32)) >> 32); nanoseconds_to_absolutetime(nsec, abs); } + +struct latched_time { + uint64_t monotonic_time_usec; + uint64_t mach_time; +}; + +extern int +kernel_sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); + /* * Time of day (calendar) variables. * @@ -270,8 +280,19 @@ clock_sec_t last_sys_sec = 0; clock_usec_t last_sys_usec = 0; #endif +#if DEVELOPMENT || DEBUG +extern int g_should_log_clock_adjustments; + +static void print_all_clock_variables(const char*, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* calend_cp); +static void print_all_clock_variables_internal(const char *, struct clock_calend* calend_cp); +#else +#define print_all_clock_variables(...) do { } while (0) +#define print_all_clock_variables_internal(...) do { } while (0) +#endif + #if CONFIG_DTRACE + /* * Unlocked calendar flipflop; this is used to track a clock_calend such * that we can safely access a snapshot of a valid clock_calend structure @@ -683,6 +704,24 @@ clock_gettimeofday_and_absolute_time( } } +static void +update_basesleep(struct bintime delta, bool forward) +{ + /* + * Update basesleep only if the platform does not have monotonic clock. + * In that case the sleep time computation will use the PMU time + * which offset gets modified by settimeofday. + * We don't need this for mononic clock because in that case the sleep + * time computation is independent from the offset value of the PMU. + */ + if (!has_monotonic_clock) { + if (forward) + bintime_add(&clock_calend.basesleep, &delta); + else + bintime_sub(&clock_calend.basesleep, &delta); + } +} + /* * clock_set_calendar_microtime: * @@ -727,6 +766,9 @@ clock_set_calendar_microtime( s = splclock(); clock_lock(); +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp = clock_calend; +#endif commpage_disable_timestamp(); /* @@ -734,29 +776,50 @@ clock_set_calendar_microtime( */ clock_get_calendar_absolute_and_microtime_locked(&oldsecs, &oldmicrosecs, &absolutesys); +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n", + __func__, (unsigned long)oldsecs, oldmicrosecs, absolutesys); + os_log(OS_LOG_DEFAULT, "%s requested %lu s %d u\n", + __func__, (unsigned long)secs, microsecs ); + } +#endif + if (oldsecs < secs || (oldsecs == secs && oldmicrosecs < microsecs)) { // moving forwards deltasecs = secs; deltamicrosecs = microsecs; TIME_SUB(deltasecs, oldsecs, deltamicrosecs, oldmicrosecs, USEC_PER_SEC); - TIME_ADD(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC); +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s delta requested %lu s %d u\n", + __func__, (unsigned long)deltasecs, deltamicrosecs); + } +#endif + + TIME_ADD(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC); clock2bintime(&deltasecs, &deltamicrosecs, &bt); bintime_add(&clock_calend.boottime, &bt); - bintime_add(&clock_calend.basesleep, &bt); - + update_basesleep(bt, TRUE); } else { // moving backwards deltasecs = oldsecs; deltamicrosecs = oldmicrosecs; TIME_SUB(deltasecs, secs, deltamicrosecs, microsecs, USEC_PER_SEC); - TIME_SUB(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC); +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s negative delta requested %lu s %d u\n", + __func__, (unsigned long)deltasecs, deltamicrosecs); + } +#endif + TIME_SUB(clock_boottime, deltasecs, clock_boottime_usec, deltamicrosecs, USEC_PER_SEC); clock2bintime(&deltasecs, &deltamicrosecs, &bt); bintime_sub(&clock_calend.boottime, &bt); - bintime_sub(&clock_calend.basesleep, &bt); + update_basesleep(bt, FALSE); } clock_calend.bintime = clock_calend.boottime; @@ -766,6 +829,10 @@ clock_set_calendar_microtime( clock_gettimeofday_set_commpage(absolutesys, bt.sec, bt.frac, clock_calend.tick_scale_x, ticks_per_sec); +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp1 = clock_calend; +#endif + commpage_value = clock_boottime * USEC_PER_SEC + clock_boottime_usec; clock_unlock(); @@ -775,8 +842,22 @@ clock_set_calendar_microtime( * Set the new value for the platform clock. * This call might block, so interrupts must be enabled. */ +#if DEVELOPMENT || DEBUG + uint64_t now_b = mach_absolute_time(); +#endif + PESetUTCTimeOfDay(newsecs, newmicrosecs); +#if DEVELOPMENT || DEBUG + uint64_t now_a = mach_absolute_time(); + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s mach bef PESet %llu mach aft %llu \n", __func__, now_b, now_a); + } +#endif + + print_all_clock_variables_internal(__func__, &clock_calend_cp); + print_all_clock_variables_internal(__func__, &clock_calend_cp1); + commpage_update_boottime(commpage_value); /* @@ -857,6 +938,12 @@ clock_update_calendar(void) */ ntp_update_second(&adjustment, clock_calend.bintime.sec); +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "%s adjustment %lld\n", __func__, adjustment); + } +#endif + /* * recomputing scale factors. */ @@ -864,20 +951,102 @@ clock_update_calendar(void) clock_gettimeofday_set_commpage(now, clock_calend.bintime.sec, clock_calend.bintime.frac, clock_calend.tick_scale_x, ticks_per_sec); +#if DEVELOPMENT || DEBUG + struct clock_calend calend_cp = clock_calend; +#endif + clock_unlock(); splx(s); + + print_all_clock_variables(__func__, NULL,NULL,NULL,NULL, &calend_cp); +} + + +#if DEVELOPMENT || DEBUG + +void print_all_clock_variables_internal(const char* func, struct clock_calend* clock_calend_cp) +{ + clock_sec_t offset_secs; + clock_usec_t offset_microsecs; + clock_sec_t bintime_secs; + clock_usec_t bintime_microsecs; + clock_sec_t bootime_secs; + clock_usec_t bootime_microsecs; + + if (!g_should_log_clock_adjustments) + return; + + bintime2usclock(&clock_calend_cp->offset, &offset_secs, &offset_microsecs); + bintime2usclock(&clock_calend_cp->bintime, &bintime_secs, &bintime_microsecs); + bintime2usclock(&clock_calend_cp->boottime, &bootime_secs, &bootime_microsecs); + + os_log(OS_LOG_DEFAULT, "%s s_scale_ns %llu s_adj_nsx %lld tick_scale_x %llu offset_count %llu\n", + func , clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx, + clock_calend_cp->tick_scale_x, clock_calend_cp->offset_count); + os_log(OS_LOG_DEFAULT, "%s offset.sec %ld offset.frac %llu offset_secs %lu offset_microsecs %d\n", + func, clock_calend_cp->offset.sec, clock_calend_cp->offset.frac, + (unsigned long)offset_secs, offset_microsecs); + os_log(OS_LOG_DEFAULT, "%s bintime.sec %ld bintime.frac %llu bintime_secs %lu bintime_microsecs %d\n", + func, clock_calend_cp->bintime.sec, clock_calend_cp->bintime.frac, + (unsigned long)bintime_secs, bintime_microsecs); + os_log(OS_LOG_DEFAULT, "%s bootime.sec %ld bootime.frac %llu bootime_secs %lu bootime_microsecs %d\n", + func, clock_calend_cp->boottime.sec, clock_calend_cp->boottime.frac, + (unsigned long)bootime_secs, bootime_microsecs); + + clock_sec_t basesleep_secs; + clock_usec_t basesleep_microsecs; + + bintime2usclock(&clock_calend_cp->basesleep, &basesleep_secs, &basesleep_microsecs); + os_log(OS_LOG_DEFAULT, "%s basesleep.sec %ld basesleep.frac %llu basesleep_secs %lu basesleep_microsecs %d\n", + func, clock_calend_cp->basesleep.sec, clock_calend_cp->basesleep.frac, + (unsigned long)basesleep_secs, basesleep_microsecs); + +} + + +void print_all_clock_variables(const char* func, clock_sec_t* pmu_secs, clock_usec_t* pmu_usec, clock_sec_t* sys_secs, clock_usec_t* sys_usec, struct clock_calend* clock_calend_cp) +{ + if (!g_should_log_clock_adjustments) + return; + + struct bintime bt; + clock_sec_t wall_secs; + clock_usec_t wall_microsecs; + uint64_t now; + uint64_t delta; + + if (pmu_secs) { + os_log(OS_LOG_DEFAULT, "%s PMU %lu s %d u \n", func, (unsigned long)*pmu_secs, *pmu_usec); + } + if (sys_secs) { + os_log(OS_LOG_DEFAULT, "%s sys %lu s %d u \n", func, (unsigned long)*sys_secs, *sys_usec); + } + + print_all_clock_variables_internal(func, clock_calend_cp); + + now = mach_absolute_time(); + delta = now - clock_calend_cp->offset_count; + + bt = scale_delta(delta, clock_calend_cp->tick_scale_x, clock_calend_cp->s_scale_ns, clock_calend_cp->s_adj_nsx); + bintime_add(&bt, &clock_calend_cp->bintime); + bintime2usclock(&bt, &wall_secs, &wall_microsecs); + + os_log(OS_LOG_DEFAULT, "%s wall %lu s %d u computed with %llu abs\n", + func, (unsigned long)wall_secs, wall_microsecs, now); } + +#endif /* DEVELOPMENT || DEBUG */ + + /* * clock_initialize_calendar: * * Set the calendar and related clocks - * from the platform clock at boot or - * wake event. + * from the platform clock at boot. * * Also sends host notifications. */ - void clock_initialize_calendar(void) { @@ -889,19 +1058,40 @@ clock_initialize_calendar(void) clock_usec_t utc_offset_microsecs; spl_t s; struct bintime bt; - + struct bintime monotonic_bt; + struct latched_time monotonic_time; + uint64_t monotonic_usec_total; + clock_sec_t sys2, monotonic_sec; + clock_usec_t microsys2, monotonic_usec; + size_t size; + + //Get PMU time with offset and corresponding sys time PEGetUTCTimeOfDay(&secs, µsecs); + clock_get_system_microtime(&sys, µsys); + + /* + * If the platform has a monotonic clock, use kern.monotonicclock_usecs + * to estimate the sleep/wake time, otherwise use the PMU and adjustments + * provided through settimeofday to estimate the sleep time. + * NOTE: the latter case relies that the kernel is the only component + * to set the PMU offset. + */ + size = sizeof(monotonic_time); + if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) { + has_monotonic_clock = 0; + os_log(OS_LOG_DEFAULT, "%s system does not have monotonic clock.\n", __func__); + } else { + has_monotonic_clock = 1; + monotonic_usec_total = monotonic_time.monotonic_time_usec; + absolutetime_to_microtime(monotonic_time.mach_time, &sys2, µsys2); + os_log(OS_LOG_DEFAULT, "%s system has monotonic clock.\n", __func__); + } s = splclock(); clock_lock(); commpage_disable_timestamp(); - /* - * Calculate the new calendar epoch based on - * the platform clock and the system clock. - */ - clock_get_system_microtime(&sys, µsys); utc_offset_secs = secs; utc_offset_microsecs = microsecs; @@ -922,11 +1112,15 @@ clock_initialize_calendar(void) * on error) in which that doesn't hold true. Bring the UTC measurements * in-line with the tick counter measurements as a best effort in that case. */ + //FIXME if the current time is prior than 1970 secs will be negative if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) { + os_log(OS_LOG_DEFAULT, "%s WARNING: PMU offset is less then sys PMU %lu s %d u sys %lu s %d u\n", + __func__, (unsigned long) secs, microsecs, (unsigned long)sys, microsys); secs = utc_offset_secs = sys; microsecs = utc_offset_microsecs = microsys; } + // PMU time with offset - sys // This macro stores the subtraction result in utc_offset_secs and utc_offset_microsecs TIME_SUB(utc_offset_secs, sys, utc_offset_microsecs, microsys, USEC_PER_SEC); @@ -952,13 +1146,33 @@ clock_initialize_calendar(void) clock_calend.s_scale_ns = NSEC_PER_SEC; clock_calend.s_adj_nsx = 0; - clock_calend.basesleep = bt; + if (has_monotonic_clock) { + monotonic_sec = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC; + monotonic_usec = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC; + + // PMU time without offset - sys + // This macro stores the subtraction result in monotonic_sec and monotonic_usec + TIME_SUB(monotonic_sec, sys2, monotonic_usec, microsys2, USEC_PER_SEC); + clock2bintime(&monotonic_sec, &monotonic_usec, &monotonic_bt); + + // set the baseleep as the difference between monotonic clock - sys + clock_calend.basesleep = monotonic_bt; + } else { + // set the baseleep as the difference between PMU clock - sys + clock_calend.basesleep = bt; + } commpage_update_mach_continuous_time(mach_absolutetime_asleep); +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp = clock_calend; +#endif + clock_unlock(); splx(s); + print_all_clock_variables(__func__, &secs, µsecs, &sys, µsys, &clock_calend_cp); + /* * Send host notifications. */ @@ -973,25 +1187,87 @@ clock_initialize_calendar(void) void clock_wakeup_calendar(void) { - clock_sec_t sys; // sleepless time since boot in seconds - clock_sec_t secs; // Current UTC time - clock_usec_t microsys; - clock_usec_t microsecs; + clock_sec_t sys; + clock_sec_t secs; + clock_usec_t microsys; + clock_usec_t microsecs; spl_t s; - struct bintime utc_offset_bt, last_sleep_bt; + struct bintime bt, last_sleep_bt; + clock_sec_t basesleep_s, last_sleep_sec; + clock_usec_t basesleep_us, last_sleep_usec; + struct latched_time monotonic_time; + uint64_t monotonic_usec_total; + size_t size; + clock_sec_t secs_copy; + clock_usec_t microsecs_copy; +#if DEVELOPMENT || DEBUG + clock_sec_t utc_sec; + clock_usec_t utc_usec; + PEGetUTCTimeOfDay(&utc_sec, &utc_usec); +#endif - PEGetUTCTimeOfDay(&secs, µsecs); + /* + * If the platform has the monotonic clock use that to + * compute the sleep time. The monotonic clock does not have an offset + * that can be modified, so nor kernel or userspace can change the time + * of this clock, it can only monotonically increase over time. + * During sleep mach_absolute_time does not tick, + * so the sleep time is the difference betwen the current monotonic time + * less the absolute time and the previous difference stored at wake time. + * + * basesleep = monotonic - sys ---> computed at last wake + * sleep_time = (monotonic - sys) - basesleep + * + * If the platform does not support monotonic time we use the PMU time + * to compute the last sleep. + * The PMU time is the monotonic clock + an offset that can be set + * by kernel. + * + * IMPORTANT: + * We assume that only the kernel is setting the offset of the PMU and that + * it is doing it only througth the settimeofday interface. + * + * basesleep is the different between the PMU time and the mach_absolute_time + * at wake. + * During awake time settimeofday can change the PMU offset by a delta, + * and basesleep is shifted by the same delta applyed to the PMU. So the sleep + * time computation becomes: + * + * PMU = monotonic + PMU_offset + * basesleep = PMU - sys ---> computed at last wake + * basesleep += settimeofday_delta + * PMU_offset += settimeofday_delta + * sleep_time = (PMU - sys) - basesleep + */ + if (has_monotonic_clock) { + //Get monotonic time with corresponding sys time + size = sizeof(monotonic_time); + if (kernel_sysctlbyname("kern.monotonicclock_usecs", &monotonic_time, &size, NULL, 0) != 0) { + panic("%s: could not call kern.monotonicclock_usecs", __func__); + } + monotonic_usec_total = monotonic_time.monotonic_time_usec; + absolutetime_to_microtime(monotonic_time.mach_time, &sys, µsys); + + secs = monotonic_usec_total / (clock_sec_t)USEC_PER_SEC; + microsecs = monotonic_usec_total % (clock_usec_t)USEC_PER_SEC; + } else { + //Get PMU time with offset and corresponding sys time + PEGetUTCTimeOfDay(&secs, µsecs); + clock_get_system_microtime(&sys, µsys); + + } s = splclock(); clock_lock(); - + commpage_disable_timestamp(); - /* - * Calculate the new calendar epoch based on - * the platform clock and the system clock. - */ - clock_get_system_microtime(&sys, µsys); + secs_copy = secs; + microsecs_copy = microsecs; + +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp1 = clock_calend; +#endif /* DEVELOPMENT || DEBUG */ #if DEVELOPMENT || DEBUG last_utc_sec = secs; @@ -1001,23 +1277,26 @@ clock_wakeup_calendar(void) if (secs > max_utc_sec) max_utc_sec = secs; #endif - /* * We normally expect the UTC clock to be always-on and produce * greater readings than the tick counter. There may be corner cases * due to differing clock resolutions (UTC clock is likely lower) and - * errors reading the UTC clock (some implementations return 0 on error) - * in which that doesn't hold true. Bring the UTC measurements in-line - * with the tick counter measurements as a best effort in that case. + * and errors reading the UTC clock (some implementations return 0 + * on error) in which that doesn't hold true. Bring the UTC measurements + * in-line with the tick counter measurements as a best effort in that case. */ + //FIXME if the current time is prior than 1970 secs will be negative if ((sys > secs) || ((sys == secs) && (microsys > microsecs))) { + os_log(OS_LOG_DEFAULT, "%s WARNING: %s is less then sys %s %lu s %d u sys %lu s %d u\n", + __func__, (has_monotonic_clock)?"monotonic":"PMU", (has_monotonic_clock)?"monotonic":"PMU", (unsigned long)secs, microsecs, (unsigned long)sys, microsys); secs = sys; microsecs = microsys; } + // PMU or monotonic - sys // This macro stores the subtraction result in secs and microsecs TIME_SUB(secs, sys, microsecs, microsys, USEC_PER_SEC); - clock2bintime(&secs, µsecs, &utc_offset_bt); + clock2bintime(&secs, µsecs, &bt); /* * Safety belt: the UTC clock will likely have a lower resolution than the tick counter. @@ -1027,20 +1306,29 @@ clock_wakeup_calendar(void) * tick counter to be less than the previously recorded value in clock.calend.basesleep. * In that case simply record that we slept for 0 ticks. */ - if ((utc_offset_bt.sec > clock_calend.basesleep.sec) || - ((utc_offset_bt.sec == clock_calend.basesleep.sec) && (utc_offset_bt.frac > clock_calend.basesleep.frac))) { + if ((bt.sec > clock_calend.basesleep.sec) || + ((bt.sec == clock_calend.basesleep.sec) && (bt.frac > clock_calend.basesleep.frac))) { - last_sleep_bt = utc_offset_bt; + //last_sleep is the difference between current PMU or monotonic - abs and last wake PMU or monotonic - abs + last_sleep_bt = bt; bintime_sub(&last_sleep_bt, &clock_calend.basesleep); - clock_calend.basesleep = utc_offset_bt; + //set baseseep to current PMU or monotonic - abs + clock_calend.basesleep = bt; + bintime2usclock(&last_sleep_bt, &last_sleep_sec, &last_sleep_usec); bintime2absolutetime(&last_sleep_bt, &mach_absolutetime_last_sleep); mach_absolutetime_asleep += mach_absolutetime_last_sleep; bintime_add(&clock_calend.offset, &last_sleep_bt); bintime_add(&clock_calend.bintime, &last_sleep_bt); - } else + + } else{ mach_absolutetime_last_sleep = 0; + last_sleep_sec = last_sleep_usec = 0; + bintime2usclock(&clock_calend.basesleep, &basesleep_s, &basesleep_us); + os_log(OS_LOG_DEFAULT, "%s WARNING: basesleep (%lu s %d u) > %s-sys (%lu s %d u) \n", + __func__, (unsigned long) basesleep_s, basesleep_us, (has_monotonic_clock)?"monotonic":"PMU", (unsigned long) secs_copy, microsecs_copy ); + } KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_CLOCK,MACH_EPOCH_CHANGE) | DBG_FUNC_NONE, @@ -1053,9 +1341,23 @@ clock_wakeup_calendar(void) commpage_update_mach_continuous_time(mach_absolutetime_asleep); adjust_cont_time_thread_calls(); +#if DEVELOPMENT || DEBUG + struct clock_calend clock_calend_cp = clock_calend; +#endif + clock_unlock(); splx(s); +#if DEVELOPMENT || DEBUG + if (g_should_log_clock_adjustments) { + os_log(OS_LOG_DEFAULT, "PMU was %lu s %d u\n",(unsigned long) utc_sec, utc_usec); + os_log(OS_LOG_DEFAULT, "last sleep was %lu s %d u\n",(unsigned long) last_sleep_sec, last_sleep_usec); + print_all_clock_variables("clock_wakeup_calendar:BEFORE", + &secs_copy, µsecs_copy, &sys, µsys, &clock_calend_cp1); + print_all_clock_variables("clock_wakeup_calendar:AFTER", NULL, NULL, NULL, NULL, &clock_calend_cp); + } +#endif /* DEVELOPMENT || DEBUG */ + host_notify_calendar_change(); #if CONFIG_DTRACE @@ -1064,7 +1366,6 @@ clock_wakeup_calendar(void) } - /* * clock_get_boottime_nanotime: * diff --git a/osfmk/kern/debug.c b/osfmk/kern/debug.c index a09bb897f..e330be0d6 100644 --- a/osfmk/kern/debug.c +++ b/osfmk/kern/debug.c @@ -96,6 +96,7 @@ #include #include #include +#include #include #include @@ -177,8 +178,9 @@ int mach_assert = 1; #define KDBG_TRACE_PANIC_FILENAME "/var/log/panic.trace" #else /* - * DEBUG_BUF_SIZE can't grow without updates to SMC and iBoot to store larger panic logs on co-processor systems */ + * EXTENDED_/DEBUG_BUF_SIZE can't grow without updates to SMC and iBoot to store larger panic logs on co-processor systems */ #define DEBUG_BUF_SIZE ((3 * PAGE_SIZE) + offsetof(struct macos_panic_header, mph_data)) +#define EXTENDED_DEBUG_BUF_SIZE 0x0013ff80 #define KDBG_TRACE_PANIC_FILENAME "/var/tmp/panic.trace" #endif @@ -198,7 +200,9 @@ char *debug_buf_ptr = (debug_buf + offsetof(struct macos_panic_header, mph_data) * On co-processor platforms, we lose sizeof(struct macos_panic_header) bytes from the end of * the end of the log because we only support writing (3*PAGESIZE) bytes. */ -const unsigned int debug_buf_size = (DEBUG_BUF_SIZE - offsetof(struct macos_panic_header, mph_data)); +unsigned int debug_buf_size = (DEBUG_BUF_SIZE - offsetof(struct macos_panic_header, mph_data)); + +boolean_t extended_debug_log_enabled = FALSE; #endif /* Debugger state */ @@ -219,13 +223,17 @@ unsigned char *kernel_uuid; */ static boolean_t debugger_is_panic = TRUE; +#if DEVELOPMENT || DEBUG +boolean_t debug_boot_arg_inited = FALSE; +#endif + +SECURITY_READ_ONLY_LATE(unsigned int) debug_boot_arg; char kernel_uuid_string[37]; /* uuid_string_t */ char panic_disk_error_description[512]; size_t panic_disk_error_description_size = sizeof(panic_disk_error_description); extern unsigned int write_trace_on_panic; - int kext_assertions_enable = #if DEBUG || DEVELOPMENT TRUE; @@ -249,16 +257,74 @@ panic_init(void) mach_assert = 1; } -#if !CONFIG_EMBEDDED - uint32_t debug_flags = 0; + /* + * Initialize the value of the debug boot-arg + */ + debug_boot_arg = 0; +#if ((CONFIG_EMBEDDED && MACH_KDP) || defined(__x86_64__)) + if (PE_parse_boot_argn("debug", &debug_boot_arg, sizeof (debug_boot_arg))) { +#if DEVELOPMENT || DEBUG + if (debug_boot_arg & DB_HALT) { + halt_in_debugger=1; + } +#endif - if (PE_i_can_has_debugger(&debug_flags) && !(debug_flags & DB_KERN_DUMP_ON_NMI)) { +#if CONFIG_EMBEDDED + if (debug_boot_arg & DB_NMI) { + panicDebugging = TRUE; + } +#else + panicDebugging = TRUE; +#if KDEBUG_MOJO_TRACE + if (debug_boot_arg & DB_PRT_KDEBUG) { + kdebug_serial = TRUE; + } +#endif +#endif /* CONFIG_EMBEDDED */ + } +#endif /* ((CONFIG_EMBEDDED && MACH_KDP) || defined(__x86_64__)) */ + +#if DEVELOPMENT || DEBUG + debug_boot_arg_inited = TRUE; +#endif + +#if !CONFIG_EMBEDDED + /* + * By default we treat Debugger() the same as calls to panic(), unless + * we have debug boot-args present and the DB_KERN_DUMP_ON_NMI *NOT* set. + * If DB_KERN_DUMP_ON_NMI is *NOT* set, return from Debugger() is supported. + * This is because writing an on-device corefile is a destructive operation. + * + * Return from Debugger() is currently only implemented on x86 + */ + if (PE_i_can_has_debugger(NULL) && !(debug_boot_arg & DB_KERN_DUMP_ON_NMI)) { debugger_is_panic = FALSE; } #endif } +#if defined (__x86_64__) +void +extended_debug_log_init(void) +{ + assert(coprocessor_paniclog_flush); + /* + * Allocate an extended panic log buffer that has space for the panic + * stackshot at the end. Update the debug buf pointers appropriately + * to point at this new buffer. + */ + char *new_debug_buf = kalloc(EXTENDED_DEBUG_BUF_SIZE); + bzero(new_debug_buf, EXTENDED_DEBUG_BUF_SIZE); + + panic_info = (struct macos_panic_header *)new_debug_buf; + debug_buf_ptr = debug_buf_base = (new_debug_buf + offsetof(struct macos_panic_header, mph_data)); + debug_buf_size = (EXTENDED_DEBUG_BUF_SIZE - offsetof(struct macos_panic_header, mph_data)); + + extended_debug_log_enabled = TRUE; +} +#endif /* defined (__x86_64__) */ + void debug_log_init(void) { @@ -272,10 +338,11 @@ debug_log_init(void) debug_buf_ptr = debug_buf_base; debug_buf_size = gPanicSize - sizeof(struct embedded_panic_header); #else + bzero(panic_info, DEBUG_BUF_SIZE); + assert(debug_buf_base != NULL); assert(debug_buf_ptr != NULL); assert(debug_buf_size != 0); - bzero(debug_buf, sizeof(debug_buf)); #endif } @@ -351,7 +418,9 @@ DebuggerSaveState(debugger_op db_op, const char *db_message, const char *db_pani CPUPANICARGS = db_panic_args; CPUPANICCALLER = db_panic_caller; } else if (CPUDEBUGGERCOUNT > 1 && db_panic_str != NULL) { - kprintf("Nested panic detected: %s", db_panic_str); + kprintf("Nested panic detected:"); + if (db_panic_str != NULL) + _doprnt(db_panic_str, db_panic_args, PE_kputc, 0); } CPUDEBUGGERSYNC = db_proceed_on_sync_failure; @@ -692,7 +761,7 @@ debugger_collect_diagnostics(unsigned int exception, unsigned int code, unsigned #endif #if defined(__x86_64__) - kprintf("Debugger called: <%s>\n", debugger_message); + kprintf("Debugger called: <%s>\n", debugger_message ? debugger_message : ""); #endif /* * DB_HALT (halt_in_debugger) can be requested on startup, we shouldn't generate @@ -707,7 +776,7 @@ debugger_collect_diagnostics(unsigned int exception, unsigned int code, unsigned } if ((debugger_current_op == DBOP_PANIC) || - (debugger_current_op == DBOP_DEBUGGER && debugger_is_panic)) { + ((debugger_current_op == DBOP_DEBUGGER) && debugger_is_panic)) { /* * Attempt to notify listeners once and only once that we've started * panicking. Only do this for Debugger() calls if we're treating @@ -745,14 +814,17 @@ debugger_collect_diagnostics(unsigned int exception, unsigned int code, unsigned } paniclog_append_noflush("\n"); } +#if defined(__x86_64__) + else if (((debugger_current_op == DBOP_DEBUGGER) && debugger_is_panic)) { + paniclog_append_noflush("Debugger called: <%s>\n", debugger_message ? debugger_message : ""); + } /* * Debugger() is treated like panic() on embedded -- for example we use it for WDT * panics (so we need to write a paniclog). On desktop Debugger() is used in the * conventional sense. */ -#if defined(__x86_64__) - if (debugger_current_op == DBOP_PANIC) + if (debugger_current_op == DBOP_PANIC || ((debugger_current_op == DBOP_DEBUGGER) && debugger_is_panic)) #endif { kdp_callouts(KDP_EVENT_PANICLOG); @@ -774,8 +846,7 @@ debugger_collect_diagnostics(unsigned int exception, unsigned int code, unsigned } #if CONFIG_KDP_INTERACTIVE_DEBUGGING - uint32_t debug_flags = 0; - PE_i_can_has_debugger(&debug_flags); + PE_i_can_has_debugger(NULL); /* * If reboot on panic is enabled and the caller of panic indicated that we should skip @@ -783,7 +854,7 @@ debugger_collect_diagnostics(unsigned int exception, unsigned int code, unsigned * allows us to persist any data that's stored in the panic log. */ if ((debugger_panic_options & DEBUGGER_OPTION_SKIP_LOCAL_COREDUMP) && - (debug_flags & DB_REBOOT_POST_CORE)) { + (debug_boot_arg & DB_REBOOT_POST_CORE)) { kdp_machine_reboot_type(kPEPanicRestartCPU); } @@ -791,12 +862,12 @@ debugger_collect_diagnostics(unsigned int exception, unsigned int code, unsigned * Consider generating a local corefile if the infrastructure is configured * and we haven't disabled on-device coredumps. */ - if (kdp_has_polled_corefile() && !(debug_flags & DB_DISABLE_LOCAL_CORE)) { + if (kdp_has_polled_corefile() && !(debug_boot_arg & DB_DISABLE_LOCAL_CORE)) { int ret = -1; #if defined (__x86_64__) /* On x86 we don't do a coredump on Debugger unless the DB_KERN_DUMP_ON_NMI boot-arg is specified. */ - if (debugger_current_op != DBOP_DEBUGGER || (debug_flags & DB_KERN_DUMP_ON_NMI)) + if (debugger_current_op != DBOP_DEBUGGER || (debug_boot_arg & DB_KERN_DUMP_ON_NMI)) #endif { /* @@ -814,8 +885,9 @@ debugger_collect_diagnostics(unsigned int exception, unsigned int code, unsigned } /* If we wrote a corefile and DB_REBOOT_POST_CORE is set, reboot */ - if (ret == 0 && (debug_flags & DB_REBOOT_POST_CORE)) + if (ret == 0 && (debug_boot_arg & DB_REBOOT_POST_CORE)) { kdp_machine_reboot_type(kPEPanicRestartCPU); + } } /* If KDP is configured, try to trap to the debugger */ diff --git a/osfmk/kern/debug.h b/osfmk/kern/debug.h index 175a0ea61..ecf996701 100644 --- a/osfmk/kern/debug.h +++ b/osfmk/kern/debug.h @@ -266,7 +266,7 @@ boolean_t kern_feature_override(uint32_t fmask); * and be done alongside astris and DumpPanic changes. */ struct embedded_panic_header { - uint32_t eph_magic; /* PANIC_MAGIC if valid */ + uint32_t eph_magic; /* EMBEDDED_PANIC_MAGIC if valid */ uint32_t eph_crc; /* CRC of everything following the ph_crc in the header and the contents */ uint32_t eph_version; /* embedded_panic_header version */ uint64_t eph_panic_flags; /* Flags indicating any state or relevant details */ @@ -287,26 +287,39 @@ struct embedded_panic_header { #define EMBEDDED_PANIC_HEADER_FLAG_NESTED_PANIC 0x40 #define EMBEDDED_PANIC_HEADER_FLAG_BUTTON_RESET_PANIC 0x80 #define EMBEDDED_PANIC_HEADER_FLAG_COPROC_INITIATED_PANIC 0x100 +#define EMBEDDED_PANIC_HEADER_FLAG_COREDUMP_FAILED 0x200 #define EMBEDDED_PANIC_HEADER_CURRENT_VERSION 1 #define EMBEDDED_PANIC_MAGIC 0x46554E4B /* FUNK */ struct macos_panic_header { - uint32_t mph_magic; /* PANIC_MAGIC if valid */ - uint32_t mph_crc; /* CRC of everything following mph_crc in the header and the contents */ - uint32_t mph_version; /* macos_panic_header version */ - uint32_t mph_padding; /* unused */ - uint64_t mph_panic_flags; /* Flags indicating any state or relevant details */ - uint32_t mph_panic_log_offset; /* Offset of the panic log from the beginning of the header */ - uint32_t mph_panic_log_len; /* length of the panic log */ - char mph_data[]; /* panic data -- DO NOT ACCESS THIS FIELD DIRECTLY. Use the offsets above relative to the beginning of the header */ + uint32_t mph_magic; /* MACOS_PANIC_MAGIC if valid */ + uint32_t mph_crc; /* CRC of everything following mph_crc in the header and the contents */ + uint32_t mph_version; /* macos_panic_header version */ + uint32_t mph_padding; /* unused */ + uint64_t mph_panic_flags; /* Flags indicating any state or relevant details */ + uint32_t mph_panic_log_offset; /* Offset of the panic log from the beginning of the header */ + uint32_t mph_panic_log_len; /* length of the panic log */ + uint32_t mph_stackshot_offset; /* Offset of the panic stackshot from the beginning of the header */ + uint32_t mph_stackshot_len; /* length of the panic stackshot */ + uint32_t mph_other_log_offset; /* Offset of the other log (any logging subsequent to the stackshot) from the beginning of the header */ + uint32_t mph_other_log_len; /* length of the other log */ + char mph_data[]; /* panic data -- DO NOT ACCESS THIS FIELD DIRECTLY. Use the offsets above relative to the beginning of the header */ } __attribute__((packed)); -#define MACOS_PANIC_HEADER_CURRENT_VERSION 1 +#define MACOS_PANIC_HEADER_CURRENT_VERSION 2 #define MACOS_PANIC_MAGIC 0x44454544 /* DEED */ -#define MACOS_PANIC_HEADER_FLAG_NESTED_PANIC 0x01 -#define MACOS_PANIC_HEADER_FLAG_COPROC_INITIATED_PANIC 0x02 +#define MACOS_PANIC_HEADER_FLAG_NESTED_PANIC 0x01 +#define MACOS_PANIC_HEADER_FLAG_COPROC_INITIATED_PANIC 0x02 +#define MACOS_PANIC_HEADER_FLAG_STACKSHOT_SUCCEEDED 0x04 +#define MACOS_PANIC_HEADER_FLAG_STACKSHOT_DATA_COMPRESSED 0x08 +#define MACOS_PANIC_HEADER_FLAG_STACKSHOT_FAILED_DEBUGGERSYNC 0x10 +#define MACOS_PANIC_HEADER_FLAG_STACKSHOT_FAILED_ERROR 0x20 +#define MACOS_PANIC_HEADER_FLAG_STACKSHOT_FAILED_INCOMPLETE 0x40 +#define MACOS_PANIC_HEADER_FLAG_STACKSHOT_FAILED_NESTED 0x80 +#define MACOS_PANIC_HEADER_FLAG_COREDUMP_COMPLETE 0x100 +#define MACOS_PANIC_HEADER_FLAG_COREDUMP_FAILED 0x200 #endif /* __APPLE_API_UNSTABLE */ #endif /* __APPLE_API_PRIVATE */ @@ -434,6 +447,7 @@ __END_DECLS boolean_t oslog_is_safe(void); boolean_t debug_mode_active(void); boolean_t stackshot_active(void); +void panic_stackshot_reset_state(void); /* * @function stack_snapshot_from_kernel @@ -461,6 +475,7 @@ stack_snapshot_from_kernel(int pid, void *buf, uint32_t size, uint32_t flags, #if !CONFIG_EMBEDDED extern char debug_buf[]; extern boolean_t coprocessor_paniclog_flush; +extern boolean_t extended_debug_log_enabled;; #endif /* !CONFIG_EMBEDDED */ extern char *debug_buf_base; @@ -471,8 +486,9 @@ extern size_t panic_disk_error_description_size; extern unsigned char *kernel_uuid; extern unsigned int debug_boot_arg; - -#ifdef XNU_KERNEL_PRIVATE +#if DEVELOPMENT || DEBUG +extern boolean_t debug_boot_arg_inited; +#endif #ifdef __cplusplus extern "C" { @@ -498,12 +514,7 @@ extern unsigned int kdebug_serial; extern const char *debugger_panic_str; extern char *debug_buf_ptr; - -#if CONFIG_EMBEDDED extern unsigned int debug_buf_size; -#else -extern const unsigned int debug_buf_size; -#endif extern void debug_log_init(void); extern void debug_putc(char); @@ -511,6 +522,8 @@ extern void debug_putc(char); extern void panic_init(void); #if defined (__x86_64__) +extern void extended_debug_log_init(void); + int packA(char *inbuf, uint32_t length, uint32_t buflen); void unpackA(char *inbuf, uint32_t length); @@ -536,7 +549,6 @@ void panic_display_ztrace(void); #if CONFIG_ECC_LOGGING void panic_display_ecc_errors(void); #endif /* CONFIG_ECC_LOGGING */ -#endif /* MACH_KERNEL_PRIVATE */ /* * @var not_in_kdp diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c index e9cd7d872..47bab64b1 100644 --- a/osfmk/kern/host.c +++ b/osfmk/kern/host.c @@ -311,6 +311,8 @@ host_info(host_t host, host_flavor_t flavor, host_info_t info, mach_msg_type_num } } +kern_return_t host_statistics(host_t host, host_flavor_t flavor, host_info_t info, mach_msg_type_number_t * count); + kern_return_t host_statistics(host_t host, host_flavor_t flavor, host_info_t info, mach_msg_type_number_t * count) { @@ -525,6 +527,219 @@ host_statistics(host_t host, host_flavor_t flavor, host_info_t info, mach_msg_ty extern uint32_t c_segment_pages_compressed; +#define HOST_STATISTICS_TIME_WINDOW 1 /* seconds */ +#define HOST_STATISTICS_MAX_REQUESTS 10 /* maximum number of requests per window */ +#define HOST_STATISTICS_MIN_REQUESTS 2 /* minimum number of requests per window */ + +uint64_t host_statistics_time_window; + +static lck_mtx_t host_statistics_lck; +static lck_grp_t* host_statistics_lck_grp; + +#define HOST_VM_INFO64_REV0 0 +#define HOST_VM_INFO64_REV1 1 +#define HOST_EXTMOD_INFO64_REV0 2 +#define HOST_LOAD_INFO_REV0 3 +#define HOST_VM_INFO_REV0 4 +#define HOST_VM_INFO_REV1 5 +#define HOST_VM_INFO_REV2 6 +#define HOST_CPU_LOAD_INFO_REV0 7 +#define HOST_EXPIRED_TASK_INFO_REV0 8 +#define HOST_EXPIRED_TASK_INFO_REV1 9 +#define NUM_HOST_INFO_DATA_TYPES 10 + +static vm_statistics64_data_t host_vm_info64_rev0 = {}; +static vm_statistics64_data_t host_vm_info64_rev1 = {}; +static vm_extmod_statistics_data_t host_extmod_info64 = {}; +static host_load_info_data_t host_load_info = {}; +static vm_statistics_data_t host_vm_info_rev0 = {}; +static vm_statistics_data_t host_vm_info_rev1 = {}; +static vm_statistics_data_t host_vm_info_rev2 = {}; +static host_cpu_load_info_data_t host_cpu_load_info = {}; +static task_power_info_data_t host_expired_task_info = {}; +static task_power_info_v2_data_t host_expired_task_info2 = {}; + +struct host_stats_cache { + uint64_t last_access; + uint64_t current_requests; + uint64_t max_requests; + uintptr_t data; + mach_msg_type_number_t count; //NOTE count is in sizeof(integer_t) +}; + +static struct host_stats_cache g_host_stats_cache[NUM_HOST_INFO_DATA_TYPES] = { + [HOST_VM_INFO64_REV0] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_vm_info64_rev0, .count = HOST_VM_INFO64_REV0_COUNT }, + [HOST_VM_INFO64_REV1] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_vm_info64_rev1, .count = HOST_VM_INFO64_REV1_COUNT }, + [HOST_EXTMOD_INFO64_REV0] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_extmod_info64, .count = HOST_EXTMOD_INFO64_COUNT }, + [HOST_LOAD_INFO_REV0] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_load_info, .count = HOST_LOAD_INFO_COUNT }, + [HOST_VM_INFO_REV0] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_vm_info_rev0, .count = HOST_VM_INFO_REV0_COUNT }, + [HOST_VM_INFO_REV1] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_vm_info_rev1, .count = HOST_VM_INFO_REV1_COUNT }, + [HOST_VM_INFO_REV2] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_vm_info_rev2, .count = HOST_VM_INFO_REV2_COUNT }, + [HOST_CPU_LOAD_INFO_REV0] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_cpu_load_info, .count = HOST_CPU_LOAD_INFO_COUNT }, + [HOST_EXPIRED_TASK_INFO_REV0] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_expired_task_info, .count = TASK_POWER_INFO_COUNT }, + [HOST_EXPIRED_TASK_INFO_REV1] = { .last_access = 0, .current_requests = 0, .max_requests = 0, .data = (uintptr_t)&host_expired_task_info2, .count = TASK_POWER_INFO_V2_COUNT}, +}; + + +void +host_statistics_init(void) +{ + host_statistics_lck_grp = lck_grp_alloc_init("host_statistics", LCK_GRP_ATTR_NULL); + lck_mtx_init(&host_statistics_lck, host_statistics_lck_grp, LCK_ATTR_NULL); + nanoseconds_to_absolutetime((HOST_STATISTICS_TIME_WINDOW * NSEC_PER_SEC), &host_statistics_time_window); +} + +static void +cache_host_statistics(int index, host_info64_t info) +{ + if (index < 0 || index >= NUM_HOST_INFO_DATA_TYPES) + return; + + task_t task = current_task(); + if (task->t_flags & TF_PLATFORM) + return; + + memcpy((void *)g_host_stats_cache[index].data, info, g_host_stats_cache[index].count * sizeof(integer_t)); + return; +} + +static void +get_cached_info(int index, host_info64_t info, mach_msg_type_number_t* count) +{ + if (index < 0 || index >= NUM_HOST_INFO_DATA_TYPES) { + *count = 0; + return; + } + + *count = g_host_stats_cache[index].count; + memcpy(info, (void *)g_host_stats_cache[index].data, g_host_stats_cache[index].count * sizeof(integer_t)); +} + +static int +get_host_info_data_index(bool is_stat64, host_flavor_t flavor, mach_msg_type_number_t* count, kern_return_t* ret) +{ + switch (flavor) { + + case HOST_VM_INFO64: + if (!is_stat64){ + *ret = KERN_INVALID_ARGUMENT; + return -1; + } + if (*count < HOST_VM_INFO64_REV0_COUNT) { + *ret = KERN_FAILURE; + return -1; + } + if (*count >= HOST_VM_INFO64_REV1_COUNT) { + return HOST_VM_INFO64_REV1; + } + return HOST_VM_INFO64_REV0; + + case HOST_EXTMOD_INFO64: + if (!is_stat64){ + *ret = KERN_INVALID_ARGUMENT; + return -1; + } + if (*count < HOST_EXTMOD_INFO64_COUNT) { + *ret = KERN_FAILURE; + return -1; + } + return HOST_EXTMOD_INFO64_REV0; + + case HOST_LOAD_INFO: + if (*count < HOST_LOAD_INFO_COUNT) { + *ret = KERN_FAILURE; + return -1; + } + return HOST_LOAD_INFO_REV0; + + case HOST_VM_INFO: + if (*count < HOST_VM_INFO_REV0_COUNT) { + *ret = KERN_FAILURE; + return -1; + } + if (*count >= HOST_VM_INFO_REV2_COUNT) { + return HOST_VM_INFO_REV2; + } + if (*count >= HOST_VM_INFO_REV1_COUNT) { + return HOST_VM_INFO_REV1; + } + return HOST_VM_INFO_REV0; + + case HOST_CPU_LOAD_INFO: + if (*count < HOST_CPU_LOAD_INFO_COUNT) { + *ret = KERN_FAILURE; + return -1; + } + return HOST_CPU_LOAD_INFO_REV0; + + case HOST_EXPIRED_TASK_INFO: + if (*count < TASK_POWER_INFO_COUNT){ + *ret = KERN_FAILURE; + return -1; + } + if (*count >= TASK_POWER_INFO_V2_COUNT){ + return HOST_EXPIRED_TASK_INFO_REV1; + } + return HOST_EXPIRED_TASK_INFO_REV0; + + default: + *ret = KERN_INVALID_ARGUMENT; + return -1; + + } + +} + +static bool +rate_limit_host_statistics(bool is_stat64, host_flavor_t flavor, host_info64_t info, mach_msg_type_number_t* count, kern_return_t* ret, int *pindex) +{ + task_t task = current_task(); + + assert(task != kernel_task); + + *ret = KERN_SUCCESS; + + /* Access control only for third party applications */ + if (task->t_flags & TF_PLATFORM) { + return FALSE; + } + + /* Rate limit to HOST_STATISTICS_MAX_REQUESTS queries for each HOST_STATISTICS_TIME_WINDOW window of time */ + bool rate_limited = FALSE; + bool set_last_access = TRUE; + + /* there is a cache for every flavor */ + int index = get_host_info_data_index(is_stat64, flavor, count, ret); + if (index == -1) + goto out; + + *pindex = index; + lck_mtx_lock(&host_statistics_lck); + if (g_host_stats_cache[index].last_access > mach_continuous_time() - host_statistics_time_window) { + set_last_access = FALSE; + if (g_host_stats_cache[index].current_requests++ >= g_host_stats_cache[index].max_requests) { + rate_limited = TRUE; + get_cached_info(index, info, count); + } + } + if (set_last_access) { + g_host_stats_cache[index].current_requests = 1; + /* + * select a random number of requests (included between HOST_STATISTICS_MIN_REQUESTS and HOST_STATISTICS_MAX_REQUESTS) + * to let query host_statistics. + * In this way it is not possible to infer looking at when the a cached copy changes if host_statistics was called on + * the provious window. + */ + g_host_stats_cache[index].max_requests = (mach_absolute_time() % (HOST_STATISTICS_MAX_REQUESTS - HOST_STATISTICS_MIN_REQUESTS + 1)) + HOST_STATISTICS_MIN_REQUESTS; + g_host_stats_cache[index].last_access = mach_continuous_time(); + } + lck_mtx_unlock(&host_statistics_lck); +out: + return rate_limited; +} + +kern_return_t host_statistics64(host_t host, host_flavor_t flavor, host_info_t info, mach_msg_type_number_t * count); + kern_return_t host_statistics64(host_t host, host_flavor_t flavor, host_info64_t info, mach_msg_type_number_t * count) { @@ -661,6 +876,52 @@ host_statistics64(host_t host, host_flavor_t flavor, host_info64_t info, mach_ms } } +kern_return_t +host_statistics64_from_user(host_t host, host_flavor_t flavor, host_info64_t info, mach_msg_type_number_t * count) +{ + kern_return_t ret = KERN_SUCCESS; + int index; + + if (host == HOST_NULL) + return (KERN_INVALID_HOST); + + if (rate_limit_host_statistics(TRUE, flavor, info, count, &ret, &index)) + return ret; + + if (ret != KERN_SUCCESS) + return ret; + + ret = host_statistics64(host, flavor, info, count); + + if (ret == KERN_SUCCESS) + cache_host_statistics(index, info); + + return ret; +} + +kern_return_t +host_statistics_from_user(host_t host, host_flavor_t flavor, host_info64_t info, mach_msg_type_number_t * count) +{ + kern_return_t ret = KERN_SUCCESS; + int index; + + if (host == HOST_NULL) + return (KERN_INVALID_HOST); + + if (rate_limit_host_statistics(FALSE, flavor, info, count, &ret, &index)) + return ret; + + if (ret != KERN_SUCCESS) + return ret; + + ret = host_statistics(host, flavor, info, count); + + if (ret == KERN_SUCCESS) + cache_host_statistics(index, info); + + return ret; +} + /* * Get host statistics that require privilege. * None for now, just call the un-privileged version. diff --git a/osfmk/kern/kern_stackshot.c b/osfmk/kern/kern_stackshot.c index 6ac0d3665..be6088138 100644 --- a/osfmk/kern/kern_stackshot.c +++ b/osfmk/kern/kern_stackshot.c @@ -746,6 +746,12 @@ kdp_snapshot_preflight(int pid, void * tracebuf, uint32_t tracebuf_size, uint32_ stack_snapshot_bytes_traced = 0; } +void +panic_stackshot_reset_state() +{ + stackshot_kcdata_p = NULL; +} + boolean_t stackshot_active() { diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index c76524bac..a1a9c563c 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -122,6 +122,7 @@ #include #include #include +#include #if CONFIG_ATM @@ -380,6 +381,9 @@ kernel_bootstrap(void) /* initialize the corpse config based on boot-args */ corpses_init(); + /* initialize host_statistics */ + host_statistics_init(); + /* * Create a kernel thread to execute the kernel bootstrap. */ diff --git a/osfmk/kern/telemetry.c b/osfmk/kern/telemetry.c index 68f2f7f9a..120885eac 100644 --- a/osfmk/kern/telemetry.c +++ b/osfmk/kern/telemetry.c @@ -50,9 +50,8 @@ #include #include -#include -#include #include +#include #include #include @@ -360,9 +359,7 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags, struct { task_t task; void *p; - struct kperf_context ctx; - struct callstack cs; - uint32_t btcount, bti; + uint32_t btcount = 0, bti; struct micro_snapshot *msnap; struct task_snapshot *tsnap; struct thread_snapshot *thsnap; @@ -402,18 +399,17 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags, struct p = get_bsdtask_info(task); - ctx.cur_thread = thread; - ctx.cur_pid = proc_pid(p); - /* * Gather up the data we'll need for this sample. The sample is written into the kernel * buffer with the global telemetry lock held -- so we must do our (possibly faulting) * copies from userland here, before taking the lock. */ - cs.nframes = MAX_CALLSTACK_FRAMES; - kperf_ucallstack_sample(&cs, &ctx); - if (!(cs.flags & CALLSTACK_VALID)) + uintptr_t frames[MAX_CALLSTACK_FRAMES] = {}; + bool user64; + int backtrace_error = backtrace_user(frames, MAX_CALLSTACK_FRAMES, &btcount, &user64); + if (backtrace_error) { return; + } /* * Find the actual [slid] address of the shared cache's UUID, and copy it in from userland. @@ -731,8 +727,6 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags, struct framesize = 4; } - btcount = cs.nframes; - /* * If we can't fit this entire stacktrace then cancel this record, wrap to the beginning, * and start again there so that we always store a full record. @@ -749,9 +743,9 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags, struct for (bti=0; bti < btcount; bti++, current_buffer->current_position += framesize) { if (framesize == 8) { - *(uint64_t *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position) = cs.frames[bti]; + *(uint64_t *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position) = frames[bti]; } else { - *(uint32_t *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position) = (uint32_t)cs.frames[bti]; + *(uint32_t *)(uintptr_t)(current_buffer->buffer + current_buffer->current_position) = (uint32_t)frames[bti]; } } diff --git a/osfmk/kern/thread_call.c b/osfmk/kern/thread_call.c index e403f17dc..d43248ee5 100644 --- a/osfmk/kern/thread_call.c +++ b/osfmk/kern/thread_call.c @@ -1221,9 +1221,10 @@ thread_call_wake( THREAD_AWAKENED, WAITQ_ALL_PRIORITIES) == KERN_SUCCESS) { group->idle_count--; group->active_count++; - if (group->idle_count == 0) { - timer_call_cancel(&group->dealloc_timer); - group->flags &= ~TCG_DEALLOC_ACTIVE; + if (group->idle_count == 0 && (group->flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE) { + if (timer_call_cancel(&group->dealloc_timer) == TRUE) { + group->flags &= ~TCG_DEALLOC_ACTIVE; + } } } else { if (!thread_call_daemon_awake && thread_call_group_should_add_thread(group)) { @@ -1500,19 +1501,18 @@ thread_call_thread( if (group->idle_count == 1) { group->idle_timestamp = mach_absolute_time(); - } + } if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) && - ((group->active_count + group->idle_count) > group->target_thread_count)) { - group->flags |= TCG_DEALLOC_ACTIVE; + ((group->active_count + group->idle_count) > group->target_thread_count)) { thread_call_start_deallocate_timer(group); - } + } /* Wait for more work (or termination) */ wres = waitq_assert_wait64(&group->idle_waitq, NO_EVENT64, THREAD_INTERRUPTIBLE, 0); if (wres != THREAD_WAITING) { panic("kcall worker unable to assert wait?"); - } + } enable_ints_and_unlock(s); @@ -1600,21 +1600,20 @@ thread_call_daemon( * is idle the whole time. */ static void -thread_call_start_deallocate_timer( - thread_call_group_t group) +thread_call_start_deallocate_timer(thread_call_group_t group) { - uint64_t deadline; - boolean_t onqueue; + __assert_only boolean_t already_enqueued; assert(group->idle_count > 0); + assert((group->flags & TCG_DEALLOC_ACTIVE) == 0); - group->flags |= TCG_DEALLOC_ACTIVE; - deadline = group->idle_timestamp + thread_call_dealloc_interval_abs; - onqueue = timer_call_enter(&group->dealloc_timer, deadline, 0); + group->flags |= TCG_DEALLOC_ACTIVE; - if (onqueue) { - panic("Deallocate timer already active?"); - } + uint64_t deadline = group->idle_timestamp + thread_call_dealloc_interval_abs; + + already_enqueued = timer_call_enter(&group->dealloc_timer, deadline, 0); + + assert(already_enqueued == FALSE); } /* non-static so dtrace can find it rdar://problem/31156135&31379348 */ @@ -1763,10 +1762,13 @@ thread_call_dealloc_timer( uint64_t now; kern_return_t res; boolean_t terminated = FALSE; - + thread_call_lock_spin(); + assert((group->flags & TCG_DEALLOC_ACTIVE) == TCG_DEALLOC_ACTIVE); + now = mach_absolute_time(); + if (group->idle_count > 0) { if (now > group->idle_timestamp + thread_call_dealloc_interval_abs) { terminated = TRUE; @@ -1777,9 +1779,10 @@ thread_call_dealloc_timer( panic("Unable to wake up idle thread for termination?"); } } - } + group->flags &= ~TCG_DEALLOC_ACTIVE; + /* * If we still have an excess of threads, schedule another * invocation of this function. @@ -1794,8 +1797,6 @@ thread_call_dealloc_timer( } thread_call_start_deallocate_timer(group); - } else { - group->flags &= ~TCG_DEALLOC_ACTIVE; } thread_call_unlock(); diff --git a/osfmk/kperf/callstack.h b/osfmk/kperf/callstack.h index aa2ec165e..76d442ced 100644 --- a/osfmk/kperf/callstack.h +++ b/osfmk/kperf/callstack.h @@ -49,6 +49,7 @@ struct callstack { uint32_t flags; uint32_t nframes; + /* WARNING this can be uintptr_t instead if CALLSTACK_KERNEL_WORDS is set */ uint64_t frames[MAX_CALLSTACK_FRAMES]; }; diff --git a/osfmk/kperf/kperf.c b/osfmk/kperf/kperf.c index 76d75d78b..19f7d8704 100644 --- a/osfmk/kperf/kperf.c +++ b/osfmk/kperf/kperf.c @@ -169,6 +169,7 @@ kperf_kernel_configure(const char *config) do { uint32_t action_samplers; + uint64_t timer_period_ns; uint64_t timer_period; pairs += 1; @@ -190,11 +191,12 @@ kperf_kernel_configure(const char *config) } config++; - timer_period = strtouq(config, &end, 0); + timer_period_ns = strtouq(config, &end, 0); if (config == end) { kprintf("kperf: unable to parse '%s' as timer period\n", config); goto out; } + nanoseconds_to_absolutetime(timer_period_ns, &timer_period); config = end; kperf_timer_set_period(pairs - 1, timer_period); diff --git a/osfmk/mach/host_info.h b/osfmk/mach/host_info.h index d5f8c6b1f..9339fad37 100644 --- a/osfmk/mach/host_info.h +++ b/osfmk/mach/host_info.h @@ -196,6 +196,9 @@ typedef struct host_priority_info *host_priority_info_t; #define HOST_EXTMOD_INFO64 5 /* External modification stats */ #define HOST_EXPIRED_TASK_INFO 6 /* Statistics for expired tasks */ +#ifdef XNU_KERNEL_PRIVATE +void host_statistics_init(void); +#endif struct host_load_info { integer_t avenrun[3]; /* scaled by LOAD_SCALE */ diff --git a/osfmk/mach/mach_host.defs b/osfmk/mach/mach_host.defs index 637109763..0f1e2c46d 100644 --- a/osfmk/mach/mach_host.defs +++ b/osfmk/mach/mach_host.defs @@ -214,7 +214,12 @@ routine mach_memory_object_memory_entry_64( /* * Return statistics from this host. */ -routine host_statistics( +routine +#ifdef KERNEL_SERVER +host_statistics_from_user( +#else +host_statistics( +#endif host_priv : host_t; flavor : host_flavor_t; out host_info_out : host_info_t, CountInOut); @@ -232,7 +237,12 @@ routine host_lockgroup_info( /* * Return 64-bit statistics from this host. */ -routine host_statistics64( +routine +#ifdef KERNEL_SERVER +host_statistics64_from_user( +#else +host_statistics64( +#endif host_priv : host_t; flavor : host_flavor_t; out host_info64_out : host_info64_t, CountInOut); diff --git a/osfmk/vm/vm_compressor_pager.c b/osfmk/vm/vm_compressor_pager.c index c5fe750fc..ae0195c86 100644 --- a/osfmk/vm/vm_compressor_pager.c +++ b/osfmk/vm/vm_compressor_pager.c @@ -645,6 +645,18 @@ compressor_pager_slot_lookup( compressor_pager_lock(pager); if ((chunk = pager->cpgr_slots.cpgr_islots[chunk_idx]) == NULL) { + + /* + * On some platforms, the memory stores from + * the bzero(t_chunk) above might not have been + * made visible and another thread might see + * the contents of this new chunk before it's + * been fully zero-filled. + * This memory barrier should take care of this + * according to the platform requirements. + */ + __c11_atomic_thread_fence(memory_order_release); + chunk = pager->cpgr_slots.cpgr_islots[chunk_idx] = t_chunk; t_chunk = NULL; } diff --git a/pexpert/arm/pe_init.c b/pexpert/arm/pe_init.c index 8accd1743..70d5e54f8 100644 --- a/pexpert/arm/pe_init.c +++ b/pexpert/arm/pe_init.c @@ -460,6 +460,9 @@ uint32_t PE_i_can_has_debugger(uint32_t *debug_flags) { if (debug_flags) { +#if DEVELOPMENT || DEBUG + assert(debug_boot_arg_inited); +#endif if (debug_enabled) *debug_flags = debug_boot_arg; else diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c index 2a82b7f36..49f9caecf 100644 --- a/pexpert/i386/pe_init.c +++ b/pexpert/i386/pe_init.c @@ -49,7 +49,7 @@ /* extern references */ extern void pe_identify_machine(void * args); - +extern int kdb_printf(const char *format, ...) __printflike(1,2); /* private globals */ PE_state_t PE_state; @@ -344,6 +344,12 @@ PE_sync_panic_buffers(void) uint32_t PE_i_can_has_debugger(uint32_t *debug_flags) { +#if DEVELOPMENT || DEBUG + if (debug_flags) { + assert(debug_boot_arg_inited); + } +#endif + #if CONFIG_CSR if (csr_check(CSR_ALLOW_KERNEL_DEBUGGER) != 0) { if (debug_flags) @@ -363,7 +369,7 @@ PE_get_offset_into_panic_region(char *location) assert(panic_info != NULL); assert(location > (char *) panic_info); - return (uint32_t) (location - debug_buf); + return (uint32_t) (location - (char *) panic_info); } void @@ -383,6 +389,9 @@ PE_init_panicheader() * * NOTE: The purpose of this function is NOT to detect/correct corruption in the panic region, * it is to update the panic header to make it consistent when we nest panics. + * + * We try to avoid nested panics/asserts on x86 because they are difficult to debug, so log any + * inconsistencies we find. */ void PE_update_panicheader_nestedpanic() @@ -396,7 +405,27 @@ PE_update_panicheader_nestedpanic() panic_info->mph_panic_flags |= MACOS_PANIC_HEADER_FLAG_NESTED_PANIC; - /* macOS panic logs include nested panic data, so don't touch the panic log length here */ + /* Usually indicative of corruption in the panic region */ + if(!(((panic_info->mph_stackshot_offset == 0) && (panic_info->mph_stackshot_len == 0)) || + ((panic_info->mph_stackshot_offset != 0) && (panic_info->mph_stackshot_len != 0)))) { + kdb_printf("panic_info contains invalid stackshot metadata: mph_stackshot_offset 0x%x mph_stackshot_len 0x%x\n", + panic_info->mph_stackshot_offset, panic_info->mph_stackshot_len); + } + + /* + * macOS panic logs contain nested panic data, if we've already closed the panic log, + * begin the other log. + */ + if ((panic_info->mph_panic_log_len != 0) && (panic_info->mph_other_log_offset == 0)) { + panic_info->mph_other_log_offset = PE_get_offset_into_panic_region(debug_buf_ptr); + + /* Usually indicative of corruption in the panic region */ + if (panic_info->mph_other_log_len != 0) { + kdb_printf("panic_info contains invalid other log metadata (zero offset but non-zero length), length was 0x%x, zeroing value\n", + panic_info->mph_other_log_len); + panic_info->mph_other_log_len = 0; + } + } return; } diff --git a/san/kasan-arm64.c b/san/kasan-arm64.c index 49137763c..3fbb9827b 100644 --- a/san/kasan-arm64.c +++ b/san/kasan-arm64.c @@ -104,10 +104,19 @@ alloc_zero_page(void) return mem; } +static void +align_to_page(vm_offset_t *addrp, vm_offset_t *sizep) +{ + vm_offset_t addr_aligned = vm_map_trunc_page(*addrp, ARM_PGMASK); + *sizep = vm_map_round_page(*sizep + (*addrp - addr_aligned), ARM_PGMASK); + *addrp = addr_aligned; +} + static void kasan_map_shadow_internal(vm_offset_t address, vm_size_t size, bool is_zero, bool back_page) { - size = vm_map_round_page(size, ARM_PGMASK); + align_to_page(&address, &size); + vm_size_t j; uint64_t *pte; @@ -189,7 +198,8 @@ kasan_map_shadow(vm_offset_t address, vm_size_t size, bool is_zero) static void kasan_map_shadow_early(vm_offset_t address, vm_size_t size, bool is_zero) { - size = vm_map_round_page(size, ARM_PGMASK); + align_to_page(&address, &size); + vm_size_t j; uint64_t *pte; diff --git a/tools/lldbmacros/core/kernelcore.py b/tools/lldbmacros/core/kernelcore.py index da145a437..580887584 100755 --- a/tools/lldbmacros/core/kernelcore.py +++ b/tools/lldbmacros/core/kernelcore.py @@ -46,30 +46,13 @@ def IterateLinkedList(element, field_name): elt = elt.__getattr__(field_name) #end of while loop -def IterateSListEntry(element, element_type, field_name, slist_prefix=''): - """ iterate over a list as defined with SLIST_HEAD in bsd/sys/queue.h - params: - element - value : Value object for slh_first - element_type - str : Type of the next element - field_name - str : Name of the field in next element's structure - returns: - A generator does not return. It is used for iterating - value : an object thats of type (element_type) head->sle_next. Always a pointer object - """ - elt = element.__getattr__(slist_prefix + 'slh_first') - if type(element_type) == str: - element_type = gettype(element_type) - while unsigned(elt) != 0: - yield elt - next_el = elt.__getattr__(field_name).__getattr__(slist_prefix + 'sle_next') - elt = cast(next_el, element_type) - def IterateListEntry(element, element_type, field_name, list_prefix=''): """ iterate over a list as defined with LIST_HEAD in bsd/sys/queue.h params: element - value : Value object for lh_first element_type - str : Type of the next element field_name - str : Name of the field in next element's structure + list_prefix - str : use 's' here to iterate SLIST_HEAD instead returns: A generator does not return. It is used for iterating value : an object thats of type (element_type) head->le_next. Always a pointer object @@ -178,6 +161,67 @@ def unpack_ptr_and_recast(v): cur_elt = unpack_ptr_and_recast(elt.GetChildMemberWithName(element_field_name).GetChildMemberWithName('next')) +def IterateRBTreeEntry(element, element_type, field_name): + """ iterate over a rbtree as defined with RB_HEAD in libkern/tree.h + element - value : Value object for rbh_root + element_type - str : Type of the link element + field_name - str : Name of the field in link element's structure + returns: + A generator does not return. It is used for iterating + value : an object thats of type (element_type) head->sle_next. Always a pointer object + """ + elt = element.__getattr__('rbh_root') + if type(element_type) == str: + element_type = gettype(element_type) + + # Walk to find min + parent = elt + while unsigned(elt) != 0: + parent = elt + elt = cast(elt.__getattr__(field_name).__getattr__('rbe_left'), element_type) + elt = parent + + # Now elt is min + while unsigned(elt) != 0: + yield elt + # implementation cribbed from RB_NEXT in libkern/tree.h + right = cast(elt.__getattr__(field_name).__getattr__('rbe_right'), element_type) + if unsigned(right) != 0: + elt = right + left = cast(elt.__getattr__(field_name).__getattr__('rbe_left'), element_type) + while unsigned(left) != 0: + elt = left + left = cast(elt.__getattr__(field_name).__getattr__('rbe_left'), element_type) + else: + + # avoid using GetValueFromAddress + addr = elt.__getattr__(field_name).__getattr__('rbe_parent')&~1 + parent = value(elt.GetSBValue().CreateValueFromExpression(None,'(void *)'+str(addr))) + parent = cast(parent, element_type) + + if unsigned(parent) != 0: + left = cast(parent.__getattr__(field_name).__getattr__('rbe_left'), element_type) + if (unsigned(parent) != 0) and (unsigned(elt) == unsigned(left)): + elt = parent + else: + if unsigned(parent) != 0: + right = cast(parent.__getattr__(field_name).__getattr__('rbe_right'), element_type) + while unsigned(parent) != 0 and (unsigned(elt) == unsigned(right)): + elt = parent + + # avoid using GetValueFromAddress + addr = elt.__getattr__(field_name).__getattr__('rbe_parent')&~1 + parent = value(elt.GetSBValue().CreateValueFromExpression(None,'(void *)'+str(addr))) + parent = cast(parent, element_type) + + right = cast(parent.__getattr__(field_name).__getattr__('rbe_right'), element_type) + + # avoid using GetValueFromAddress + addr = elt.__getattr__(field_name).__getattr__('rbe_parent')&~1 + elt = value(elt.GetSBValue().CreateValueFromExpression(None,'(void *)'+str(addr))) + elt = cast(elt, element_type) + + class KernelTarget(object): """ A common kernel object that provides access to kernel objects and information. The class holds global lists for task, terminated_tasks, procs, zones, zombroc etc. diff --git a/tools/lldbmacros/net.py b/tools/lldbmacros/net.py index 0c17e7b3b..2c6cfd876 100755 --- a/tools/lldbmacros/net.py +++ b/tools/lldbmacros/net.py @@ -31,11 +31,16 @@ def ShowIfConfiguration(ifnet): """ Display ifconfig-like output for the ifnet """ iface = Cast(ifnet, 'ifnet *') + dlifnet = Cast(ifnet, 'dlil_ifnet *') out_string = "" format_string = "{0: index {3: +#include +#include +#include +#include +#include +#include +#include + +#if !defined(CS_OPS_CLEARPLATFORM) +#define CS_OPS_CLEARPLATFORM 13 +#endif + +#define WINDOW 1 /* seconds */ +#define MAX_ATTEMP_PER_SEC 10 +#define ITER 30 +#define RETRY 5 + +static int +remove_platform_binary(void){ + int ret; + uint32_t my_csflags; + + T_QUIET; T_ASSERT_POSIX_ZERO(csops(getpid(), CS_OPS_STATUS, &my_csflags, sizeof(my_csflags)), NULL); + + if (!(my_csflags & CS_PLATFORM_BINARY)) { + return 0; + } + + ret = csops(getpid(), CS_OPS_CLEARPLATFORM, NULL, 0); + if (ret) { + switch (errno) { + case ENOTSUP: + T_LOG("clearing platform binary not supported, skipping test"); + return -1; + default: + T_LOG("csops failed with flag CS_OPS_CLEARPLATFORM"); + return -1; + } + } + + my_csflags = 0; + T_QUIET; T_ASSERT_POSIX_ZERO(csops(getpid(), CS_OPS_STATUS, &my_csflags, sizeof(my_csflags)), NULL); + + if (my_csflags & CS_PLATFORM_BINARY) { + T_LOG("platform binary flag still set"); + return -1; + } + + return 0; +} + +struct all_host_info { + vm_statistics64_data_t host_vm_info64_rev0; + vm_statistics64_data_t host_vm_info64_rev1; + vm_extmod_statistics_data_t host_extmod_info64; + host_load_info_data_t host_load_info; + vm_statistics_data_t host_vm_info_rev0; + vm_statistics_data_t host_vm_info_rev1; + vm_statistics_data_t host_vm_info_rev2; + host_cpu_load_info_data_t host_cpu_load_info; + task_power_info_v2_data_t host_expired_task_info; + task_power_info_v2_data_t host_expired_task_info2; +}; + +static void +check_host_info(struct all_host_info* data, unsigned long iter, char lett){ + char* datap; + unsigned long i,j; + + /* check that for the shorter revisions no data is copied on the bytes of diff with the longer */ + for ( j = 0 ; j < iter; j++) { + datap = (char*) &data[j].host_vm_info64_rev0; + for ( i = (HOST_VM_INFO64_REV0_COUNT * sizeof(int)); i< (HOST_VM_INFO64_REV1_COUNT * sizeof(int)); i++) { + T_QUIET;T_ASSERT_EQ(datap[i], lett, "HOST_VM_INFO64_REV0 byte %lu iter %lu", i, j); + } + + datap = (char*) &data[j].host_vm_info_rev0; + for ( i = (HOST_VM_INFO_REV0_COUNT * sizeof(int)); i< (HOST_VM_INFO_REV2_COUNT * sizeof(int)); i++) { + T_QUIET;T_ASSERT_EQ(datap[i], lett, "HOST_VM_INFO_REV0 byte %lu iter %lu", i, j); + } + + datap = (char*) &data[j].host_vm_info_rev1; + for ( i = (HOST_VM_INFO_REV1_COUNT * sizeof(int)); i< (HOST_VM_INFO_REV2_COUNT * sizeof(int)); i++) { + T_QUIET;T_ASSERT_EQ(datap[i], lett, "HOST_VM_INFO_REV1 byte %lu iter %lu", i, j); + } + + datap = (char*) &data[j].host_expired_task_info; + for ( i = (TASK_POWER_INFO_COUNT * sizeof(int)); i< (TASK_POWER_INFO_V2_COUNT * sizeof(int)); i++) { + T_QUIET;T_ASSERT_EQ(datap[i], lett, "TASK_POWER_INFO_COUNT byte %lu iter %lu", i, j); + } + } + T_LOG("No data overflow"); + + datap = (char*) data; + + /* check that after MAX_ATTEMP_PER_SEC data are all the same */ + for ( i = 0 ; i < sizeof(struct all_host_info) ; i++ ) + for ( j = MAX_ATTEMP_PER_SEC - 1 ; j < iter - 1; j++) { + T_QUIET; T_ASSERT_EQ(datap[i+(j * sizeof(struct all_host_info))], datap[i+((j+1) * sizeof(struct all_host_info))], "all_host_info iter %lu does not match iter %lu", j, j+1); + } + + T_LOG("Data was cached"); +} + +static void +get_host_info(struct all_host_info* data, host_t self, int iter){ + int i; + unsigned int count; + for (i = 0; i < iter; i++){ + count = HOST_VM_INFO64_REV0_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics64(self, HOST_VM_INFO64, (host_info64_t)&data[i].host_vm_info64_rev0, &count), NULL); + count = HOST_VM_INFO64_REV1_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics64(self, HOST_VM_INFO64, (host_info64_t)&data[i].host_vm_info64_rev1, &count), NULL); + count = HOST_EXTMOD_INFO64_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics64(self, HOST_EXTMOD_INFO64, (host_info64_t)&data[i].host_extmod_info64, &count), NULL); + count = HOST_LOAD_INFO_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics(self, HOST_LOAD_INFO, (host_info_t)&data[i].host_load_info, &count), NULL); + count = HOST_VM_INFO_REV0_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics(self, HOST_VM_INFO, (host_info_t)&data[i].host_vm_info_rev0, &count), NULL); + count = HOST_VM_INFO_REV1_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics(self, HOST_VM_INFO, (host_info_t)&data[i].host_vm_info_rev1, &count), NULL); + count = HOST_VM_INFO_REV2_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics(self, HOST_VM_INFO, (host_info_t)&data[i].host_vm_info_rev2, &count), NULL); + count = HOST_CPU_LOAD_INFO_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics(self, HOST_CPU_LOAD_INFO, (host_info_t)&data[i].host_cpu_load_info, &count), NULL); + count = TASK_POWER_INFO_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics(self, HOST_EXPIRED_TASK_INFO, (host_info_t)&data[i].host_expired_task_info, &count), NULL); + count = TASK_POWER_INFO_V2_COUNT; + T_QUIET;T_ASSERT_POSIX_ZERO(host_statistics(self, HOST_EXPIRED_TASK_INFO, (host_info_t)&data[i].host_expired_task_info2, &count), NULL); + + } + +} + +T_DECL(test_host_statistics, "testing rate limit for host_statistics", + T_META_CHECK_LEAKS(false), T_META_ALL_VALID_ARCHS(true)) +{ + + unsigned long long start, end, window; + int retry = 0; + host_t self; + char lett = 'a'; + struct all_host_info* data; + mach_timebase_info_data_t timebaseInfo = { 0, 0 }; + + if (remove_platform_binary()) + T_SKIP("Failed to remove platform binary"); + + data = malloc(ITER * sizeof(struct all_host_info)); + T_QUIET;T_ASSERT_NE(data, NULL, "malloc"); + + /* check the size of the data structure against the bytes in COUNT*/ + T_QUIET;T_ASSERT_EQ(sizeof(data[0].host_vm_info64_rev0), HOST_VM_INFO64_COUNT * sizeof(int), "HOST_VM_INFO64_COUNT"); + T_QUIET;T_ASSERT_EQ(sizeof(data[0].host_extmod_info64), HOST_EXTMOD_INFO64_COUNT * sizeof(int), "HOST_EXTMOD_INFO64_COUNT"); + T_QUIET;T_ASSERT_EQ(sizeof(data[0].host_load_info), HOST_LOAD_INFO_COUNT * sizeof(int), "HOST_LOAD_INFO_COUNT"); + T_QUIET;T_ASSERT_EQ(sizeof(data[0].host_vm_info_rev0), HOST_VM_INFO_COUNT * sizeof(int), "HOST_VM_INFO_COUNT"); + T_QUIET;T_ASSERT_EQ(sizeof(data[0].host_cpu_load_info), HOST_CPU_LOAD_INFO_COUNT * sizeof(int), "HOST_CPU_LOAD_INFO_COUNT"); + T_QUIET;T_ASSERT_EQ(sizeof(data[0].host_expired_task_info2), TASK_POWER_INFO_V2_COUNT * sizeof(int), "TASK_POWER_INFO_V2_COUNT"); + + /* check that the latest revision is the COUNT */ + T_QUIET;T_ASSERT_EQ(HOST_VM_INFO64_REV1_COUNT, HOST_VM_INFO64_COUNT, "HOST_VM_INFO64_REV1_COUNT"); + T_QUIET;T_ASSERT_EQ(HOST_VM_INFO_REV2_COUNT, HOST_VM_INFO_COUNT, "HOST_VM_INFO_REV2_COUNT"); + + /* check that the previous revision are smaller than the latest */ + T_QUIET;T_ASSERT_LE(HOST_VM_INFO64_REV0_COUNT, HOST_VM_INFO64_REV1_COUNT, "HOST_VM_INFO64_REV0"); + T_QUIET;T_ASSERT_LE(HOST_VM_INFO_REV0_COUNT, HOST_VM_INFO_REV2_COUNT, "HOST_VM_INFO_REV0_COUNT"); + T_QUIET;T_ASSERT_LE(HOST_VM_INFO_REV1_COUNT, HOST_VM_INFO_REV2_COUNT, "HOST_VM_INFO_REV1_COUNT"); + T_QUIET;T_ASSERT_LE(TASK_POWER_INFO_COUNT,TASK_POWER_INFO_V2_COUNT, "TASK_POWER_INFO_COUNT"); + + memset(data, lett, ITER * sizeof(struct all_host_info)); + self = mach_host_self(); + + T_QUIET;T_ASSERT_EQ(mach_timebase_info(&timebaseInfo), KERN_SUCCESS, NULL); + window = (WINDOW * NSEC_PER_SEC * timebaseInfo.denom) / timebaseInfo.numer; + retry = 0; + + /* try to get ITER copies of host_info within window time, in such a way we should hit for sure a cached copy */ + do { + start = mach_continuous_time(); + get_host_info(data, self, ITER); + end = mach_continuous_time(); + retry++; + } while( (end - start > window) && retry <= RETRY); + + if (retry <= RETRY) + check_host_info(data, ITER, lett); + else + T_SKIP("Failed to find window for test"); +} + diff --git a/tools/tests/darwintests/task_info.c b/tools/tests/darwintests/task_info.c index 516bf4a6b..906243a56 100644 --- a/tools/tests/darwintests/task_info.c +++ b/tools/tests/darwintests/task_info.c @@ -1,16 +1,16 @@ +#include +#include +#include #include +#include +#include #include #include -#include #include -#include -#include #include #include -#include -#include #include -#include +#include /* ************************************************************************************* * Test the task_info API. @@ -311,18 +311,30 @@ T_DECL(task_absolutetime_info, "tests task absolute time info", T_META_ASROOT(tr "Tests whether the difference between thread times is greater than the expected range"); #endif - /* - * There is no way of estimating the exact number of threads, hence checking the counter to be non-zero for now. - */ + if (absolute_time_info_data.threads_user <= 0) { + int precise_time_val = 0; + size_t len = sizeof(size_t); + + T_LOG("User threads time is zero. This should only happen rarely and when precise_user_time is off"); - T_EXPECT_NE(absolute_time_info_data.threads_user, 0ULL, "task_info should return non-zero number of user threads"); + err = sysctlbyname("kern.precise_user_kernel_time", &precise_time_val, &len, NULL, 0); + + T_EXPECT_POSIX_SUCCESS(err, "performing sysctl to check precise_user_time"); + + T_LOG("kern.precise_user_kernel_time val = %d", precise_time_val); + + T_EXPECT_FALSE(precise_time_val, "user thread time should only be zero when precise_user_kernel_time is disabled"); + } else { + T_PASS("task_info should return non-zero value for user threads time = %llu", absolute_time_info_data.threads_user); + } #if !(defined(__arm__) || defined(__arm64__)) /* * On iOS, system threads are always zero. On OS X this value can be some large positive number. * There is no real way to estimate the exact amount. */ - T_EXPECT_NE(absolute_time_info_data.threads_system, 0ULL, "task_info should return non-zero number of system threads"); + T_EXPECT_NE(absolute_time_info_data.threads_system, 0ULL, + "task_info should return non-zero value for system threads time = %llu", absolute_time_info_data.threads_system); #endif /* diff --git a/tools/tests/darwintests/task_inspect.entitlements b/tools/tests/darwintests/task_inspect.entitlements index d9cb98b6d..eaaf1dedb 100644 --- a/tools/tests/darwintests/task_inspect.entitlements +++ b/tools/tests/darwintests/task_inspect.entitlements @@ -4,5 +4,7 @@ com.apple.system-task-ports + task_for_pid-allow +