From 777415ead45868fa10546e9e75e4973c77c9277e Mon Sep 17 00:00:00 2001 From: Darwin Date: Tue, 11 Mar 2014 19:57:55 +0000 Subject: [PATCH] xnu-2422.90.20 Imported from https://opensource.apple.com/tarballs/xnu/xnu-2422.90.20.tar.gz --- bsd/dev/dtrace/dtrace.c | 4 +- bsd/kern/decmpfs.c | 2 +- bsd/kern/kern_cs.c | 27 +++- bsd/kern/kern_symfile.c | 24 ++- bsd/kern/trace.codes | 6 + bsd/kern/ubc_subr.c | 30 ++++ bsd/net/if_gif.c | 4 +- bsd/netinet/in_arp.c | 2 +- bsd/netinet/in_pcb.c | 4 +- bsd/netinet/tcp_timer.c | 9 +- bsd/netinet6/esp_rijndael.c | 41 ++++- bsd/nfs/nfs_vnops.c | 65 +++++--- bsd/sys/cdefs.h | 5 +- bsd/sys/dtrace.h | 2 +- bsd/sys/signal.h | 2 +- bsd/sys/ubc_internal.h | 3 + bsd/sys/vnode_internal.h | 2 + bsd/vfs/vfs_subr.c | 14 ++ bsd/vfs/vfs_syscalls.c | 22 ++- bsd/vm/vm_compressor_backing_file.c | 2 +- bsd/vm/vm_unix.c | 39 ++--- bsd/vm/vnode_pager.c | 41 +++-- config/MasterVersion | 2 +- config/Private.exports | 1 + config/Private.x86_64.exports | 1 + iokit/IOKit/IOMemoryDescriptor.h | 8 +- iokit/Kernel/IOHibernateIO.cpp | 16 +- iokit/Kernel/IOMemoryDescriptor.cpp | 5 +- iokit/Kernel/IOPMrootDomain.cpp | 2 +- iokit/Kernel/IORegistryEntry.cpp | 89 ++++++----- osfmk/conf/MASTER.x86_64 | 2 +- osfmk/i386/cpuid.c | 1 + osfmk/i386/cpuid.h | 1 + osfmk/i386/lapic.h | 2 + osfmk/i386/lapic_native.c | 23 +++ osfmk/i386/machine_check.c | 226 +++------------------------- osfmk/i386/machine_check.h | 71 --------- osfmk/i386/mp.c | 1 + osfmk/i386/mtrr.c | 59 -------- osfmk/i386/pmCPU.c | 7 +- osfmk/i386/pmap_x86_common.c | 5 + osfmk/i386/postcode.h | 15 +- osfmk/i386/rtclock.c | 9 +- osfmk/i386/tsc.c | 15 ++ osfmk/i386/tsc.h | 1 + osfmk/kdp/kdp.c | 4 + osfmk/kern/startup.c | 8 +- osfmk/kern/task.c | 8 + osfmk/kern/telemetry.c | 9 +- osfmk/kern/thread.c | 6 + osfmk/kern/timer_call.c | 8 +- osfmk/mach/memory_object_types.h | 4 +- osfmk/vm/bsd_vm.c | 28 ++-- osfmk/vm/vm_apple_protect.c | 2 +- osfmk/vm/vm_fault.c | 120 ++++++++++++++- osfmk/vm/vm_options.h | 6 + osfmk/vm/vm_page.h | 9 +- osfmk/vm/vm_pageout.c | 52 ++++++- osfmk/vm/vm_protos.h | 25 ++- osfmk/vm/vm_resident.c | 137 +++++++++++++++-- osfmk/vm/vm_shared_region.c | 70 ++++++--- osfmk/vm/vm_shared_region.h | 4 +- tools/lldbmacros/memory.py | 111 ++++++++++++++ 63 files changed, 943 insertions(+), 580 deletions(-) diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index 1a3196990..314ed0b57 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -2127,7 +2127,7 @@ dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); - uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEPS(arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); llquanta[dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, nval)] += incr; } @@ -10642,7 +10642,7 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); - uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEPS(desc->dtad_arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); int64_t v; agg->dtag_initial = desc->dtad_arg; diff --git a/bsd/kern/decmpfs.c b/bsd/kern/decmpfs.c index 582298c17..ec784aebd 100644 --- a/bsd/kern/decmpfs.c +++ b/bsd/kern/decmpfs.c @@ -1049,7 +1049,7 @@ commit_upl(upl_t upl, upl_offset_t pl_offset, size_t uplSize, int flags, int abo ErrorLog("ubc_upl_commit_range error %d\n", (int)kr); } else { VerboseLog("committing upl, flags 0x%08x\n", flags | UPL_COMMIT_CLEAR_DIRTY); - kr = ubc_upl_commit_range(upl, pl_offset, uplSize, flags | UPL_COMMIT_CLEAR_DIRTY); + kr = ubc_upl_commit_range(upl, pl_offset, uplSize, flags | UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_WRITTEN_BY_KERNEL); if (kr != KERN_SUCCESS) ErrorLog("ubc_upl_commit_range error %d\n", (int)kr); } diff --git a/bsd/kern/kern_cs.c b/bsd/kern/kern_cs.c index 041405044..648aaa100 100644 --- a/bsd/kern/kern_cs.c +++ b/bsd/kern/kern_cs.c @@ -70,6 +70,8 @@ #include +#include + unsigned long cs_procs_killed = 0; unsigned long cs_procs_invalidated = 0; @@ -102,9 +104,15 @@ SYSCTL_INT(_vm, OID_AUTO, cs_enforcement, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_enfor SYSCTL_INT(_vm, OID_AUTO, cs_enforcement_panic, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_enforcement_panic, 0, ""); #endif +int panic_on_cs_killed = 0; void cs_init(void) { +#if MACH_ASSERT + panic_on_cs_killed = 1; +#endif + PE_parse_boot_argn("panic_on_cs_killed", &panic_on_cs_killed, + sizeof (panic_on_cs_killed)); #if !SECURE_KERNEL int disable_cs_enforcement = 0; PE_parse_boot_argn("cs_enforcement_disable", &disable_cs_enforcement, @@ -182,12 +190,25 @@ cs_invalid_page( /* CS_KILL triggers a kill signal, and no you can't have the page. Nothing else. */ if (p->p_csflags & CS_KILL) { + if (panic_on_cs_killed && + vaddr >= SHARED_REGION_BASE && + vaddr < SHARED_REGION_BASE + SHARED_REGION_SIZE) { + panic(" cs_invalid_page(va=0x%llx): killing p=%p\n", (uint64_t) vaddr, p); + } p->p_csflags |= CS_KILLED; cs_procs_killed++; send_kill = 1; retval = 1; } +#if __x86_64__ + if (panic_on_cs_killed && + vaddr >= SHARED_REGION_BASE && + vaddr < SHARED_REGION_BASE + SHARED_REGION_SIZE) { + panic(" cs_invalid_page(va=0x%llx): cs error p=%p\n", (uint64_t) vaddr, p); + } +#endif /* __x86_64__ */ + /* CS_HARD means fail the mapping operation so the process stays valid. */ if (p->p_csflags & CS_HARD) { retval = 1; @@ -214,14 +235,14 @@ cs_invalid_page( NULL ); printf("CODE SIGNING: cs_invalid_page(0x%llx): " - "p=%d[%s] final status 0x%x, %sing page%s\n", + "p=%d[%s] final status 0x%x, %s page%s\n", vaddr, p->p_pid, p->p_comm, p->p_csflags, - retval ? "deny" : "allow (remove VALID)", + retval ? "denying" : "allowing (remove VALID)", send_kill ? " sending SIGKILL" : ""); } if (send_kill) - psignal(p, SIGKILL); + threadsignal(current_thread(), SIGKILL, EXC_BAD_ACCESS); return retval; diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index 78c9fa723..062ffa524 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -268,20 +268,30 @@ kern_open_file_for_direct_io(const char * name, if (set_file_size && (set_file_size != (off_t) va.va_data_alloc)) { - off_t bytesallocated = 0; u_int32_t alloc_flags = PREALLOCATE | ALLOCATEFROMPEOF | ALLOCATEALL; vnode_lock_spin(ref->vp); CLR(ref->vp->v_flag, VSWAP); vnode_unlock(ref->vp); - error = VNOP_ALLOCATE(ref->vp, set_file_size, alloc_flags, - &bytesallocated, 0 /*fst_offset*/, - ref->ctx); + if (set_file_size < (off_t) va.va_data_alloc) + { + struct vnode_attr setva; + VATTR_INIT(&setva); + VATTR_SET(&setva, va_data_size, set_file_size); + error = vnode_setattr(ref->vp, &setva, ref->ctx); + } + else + { + off_t bytesallocated = set_file_size - va.va_data_alloc; + error = VNOP_ALLOCATE(ref->vp, bytesallocated, alloc_flags, + &bytesallocated, 0 /*fst_offset*/, + ref->ctx); + HIBLOG("VNOP_ALLOCATE(%d) %qd\n", error, bytesallocated); + } // F_SETSIZE: - if (!error) error = vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL, ref->ctx); - kprintf("vnode_setsize(%d) %qd\n", error, set_file_size); - ref->filelength = bytesallocated; + (void) vnode_setsize(ref->vp, set_file_size, IO_NOZEROFILL, ref->ctx); + ref->filelength = set_file_size; vnode_lock_spin(ref->vp); SET(ref->vp->v_flag, VSWAP); diff --git a/bsd/kern/trace.codes b/bsd/kern/trace.codes index a31308ad2..ef6b4f3c8 100644 --- a/bsd/kern/trace.codes +++ b/bsd/kern/trace.codes @@ -1391,6 +1391,12 @@ 0x531024C CPUPM_PST_QOS_RATEUNLIMIT 0x5310250 CPUPM_PST_QOS_SWITCH 0x5310254 CPUPM_FORCED_IDLE +0x5310258 CPUPM_PST_RAW_PERF +0x531025C CPUPM_CPU_HALT_DEEP +0x5310260 CPUPM_CPU_HALT +0x5310264 CPUPM_CPU_OFFLINE +0x5310268 CPUPM_CPU_EXIT_HALT +0x531026C CPUPM_PST_QOS_CHARGE 0x5330000 HIBERNATE 0x5330004 HIBERNATE_WRITE_IMAGE 0x5330008 HIBERNATE_MACHINE_INIT diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index 9d3276570..f9ae6591b 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -2778,6 +2778,9 @@ ubc_cs_blob_add( const CS_CodeDirectory *cd; off_t blob_start_offset, blob_end_offset; SHA1_CTX sha1ctxt; + boolean_t record_mtime; + + record_mtime = FALSE; blob_handle = IPC_PORT_NULL; @@ -2981,6 +2984,11 @@ ubc_cs_blob_add( goto out; } + if (uip->cs_blobs == NULL) { + /* loading 1st blob: record the file's current "modify time" */ + record_mtime = TRUE; + } + /* * Add this blob to the list of blobs for this vnode. * We always add at the front of the list and we never remove a @@ -3021,6 +3029,10 @@ ubc_cs_blob_add( vnode_unlock(vp); + if (record_mtime) { + vnode_mtime(vp, &uip->cs_mtime, vfs_context_current()); + } + error = 0; /* success ! */ out: @@ -3158,6 +3170,24 @@ ubc_get_cs_blobs( return blobs; } +void +ubc_get_cs_mtime( + struct vnode *vp, + struct timespec *cs_mtime) +{ + struct ubc_info *uip; + + if (! UBCINFOEXISTS(vp)) { + cs_mtime->tv_sec = 0; + cs_mtime->tv_nsec = 0; + return; + } + + uip = vp->v_ubcinfo; + cs_mtime->tv_sec = uip->cs_mtime.tv_sec; + cs_mtime->tv_nsec = uip->cs_mtime.tv_nsec; +} + unsigned long cs_validate_page_no_hash = 0; unsigned long cs_validate_page_bad_hash = 0; boolean_t diff --git a/bsd/net/if_gif.c b/bsd/net/if_gif.c index d6608b77c..feb736273 100644 --- a/bsd/net/if_gif.c +++ b/bsd/net/if_gif.c @@ -586,11 +586,11 @@ gif_output( #endif default: error = ENETDOWN; - goto end; + break; } -end: GIF_UNLOCK(sc); +end: if (error) { /* the mbuf was freed either by in_gif_output or in here */ ifnet_stat_increment_out(ifp, 0, 0, 1); diff --git a/bsd/netinet/in_arp.c b/bsd/netinet/in_arp.c index a18147ce2..b8dbd6038 100644 --- a/bsd/netinet/in_arp.c +++ b/bsd/netinet/in_arp.c @@ -532,7 +532,7 @@ arptfree(struct llinfo_arp *la, void *arg) if (rt->rt_expire == 0 || (rt->rt_flags & RTF_STATIC)) { ap->sticky++; /* ARP entry is permanent? */ - if (!(rt->rt_flags & RTF_STATIC)) { + if (rt->rt_expire == 0) { RT_UNLOCK(rt); return; } diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index a3681b8b4..973abc9ac 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -1104,7 +1104,7 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, * interface has been set as a multicast option, use the * address of that interface as our source address. */ - if (error == 0 && IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && + if (IN_MULTICAST(ntohl(SIN(nam)->sin_addr.s_addr)) && inp->inp_moptions != NULL) { struct ip_moptions *imo; struct ifnet *ifp; @@ -1126,6 +1126,8 @@ in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, struct in_addr *laddr, lck_rw_done(in_ifaddr_rwlock); if (ia == NULL) error = EADDRNOTAVAIL; + else + error = 0; } IMO_UNLOCK(imo); } diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c index bc2942705..a855e6fa9 100644 --- a/bsd/netinet/tcp_timer.c +++ b/bsd/netinet/tcp_timer.c @@ -520,12 +520,15 @@ tcp_gc(struct inpcbinfo *ipi) * socket lock for better performance. If there are * any pcbs in time-wait, the timer will get rescheduled. * Hence some error in this check can be tolerated. + * + * Sometimes a socket on time-wait queue can be closed if + * 2MSL timer expired but the application still has a + * usecount on it. */ - if (TSTMP_GEQ(tcp_now, tw_tp->t_timer[TCPT_2MSL])) { + if (tw_tp->t_state == TCPS_CLOSED || + TSTMP_GEQ(tcp_now, tw_tp->t_timer[TCPT_2MSL])) { if (tcp_garbage_collect(tw_tp->t_inpcb, 1)) atomic_add_32(&ipi->ipi_gc_req.intimer_lazy, 1); - } else { - break; } } diff --git a/bsd/netinet6/esp_rijndael.c b/bsd/netinet6/esp_rijndael.c index af5ddff1f..4ca425239 100644 --- a/bsd/netinet6/esp_rijndael.c +++ b/bsd/netinet6/esp_rijndael.c @@ -81,8 +81,8 @@ #include +#define MAX_REALIGN_LEN 2000 #define AES_BLOCKLEN 16 -#define MAX_SBUF_LEN 2000 extern lck_mtx_t *sadb_mutex; @@ -152,7 +152,7 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen) int sn, dn; /* offset from the head of the mbuf, to meat */ size_t ivoff, bodyoff; u_int8_t iv[AES_BLOCKLEN] __attribute__((aligned(4))), *dptr; - u_int8_t sbuf[MAX_SBUF_LEN] __attribute__((aligned(4))), *sp, *sp_unaligned; + u_int8_t sbuf[AES_BLOCKLEN] __attribute__((aligned(4))), *sp, *sp_unaligned, *sp_aligned = NULL; struct mbuf *scut; int scutoff; int i, len; @@ -277,7 +277,15 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen) sp_unaligned = NULL; } else { sp_unaligned = sp; - sp = sbuf; + if (len > MAX_REALIGN_LEN) { + return ENOBUFS; + } + if (sp_aligned == NULL) { + sp_aligned = (u_int8_t *)_MALLOC(MAX_REALIGN_LEN, M_SECA, M_DONTWAIT); + if (sp_aligned == NULL) + return ENOMEM; + } + sp = sp_aligned; memcpy(sp, sp_unaligned, len); } // no need to check output pointer alignment @@ -310,6 +318,12 @@ esp_cbc_decrypt_aes(m, off, sav, algo, ivlen) scut->m_len = scutoff; scut->m_next = d0; + // free memory + if (sp_aligned != NULL) { + FREE(sp_aligned, M_SECA); + sp_aligned = NULL; + } + /* just in case */ bzero(iv, sizeof(iv)); bzero(sbuf, sizeof(sbuf)); @@ -332,7 +346,7 @@ esp_cbc_encrypt_aes( int sn, dn; /* offset from the head of the mbuf, to meat */ size_t ivoff, bodyoff; u_int8_t *ivp, *dptr, *ivp_unaligned; - u_int8_t sbuf[MAX_SBUF_LEN] __attribute__((aligned(4))), *sp, *sp_unaligned; + u_int8_t sbuf[AES_BLOCKLEN] __attribute__((aligned(4))), *sp, *sp_unaligned, *sp_aligned = NULL; u_int8_t ivp_aligned_buf[AES_BLOCKLEN] __attribute__((aligned(4))); struct mbuf *scut; int scutoff; @@ -458,7 +472,15 @@ esp_cbc_encrypt_aes( sp_unaligned = NULL; } else { sp_unaligned = sp; - sp = sbuf; + if (len > MAX_REALIGN_LEN) { + return ENOBUFS; + } + if (sp_aligned == NULL) { + sp_aligned = (u_int8_t *)_MALLOC(MAX_REALIGN_LEN, M_SECA, M_DONTWAIT); + if (sp_aligned == NULL) + return ENOMEM; + } + sp = sp_aligned; memcpy(sp, sp_unaligned, len); } // check ivp pointer alignment and use a separate aligned buffer (if ivp is not aligned on 4-byte boundary). @@ -467,7 +489,7 @@ esp_cbc_encrypt_aes( } else { ivp_unaligned = ivp; ivp = ivp_aligned_buf; - memcpy(ivp, ivp_unaligned, len); + memcpy(ivp, ivp_unaligned, AES_BLOCKLEN); } // no need to check output pointer alignment aes_encrypt_cbc(sp, ivp, len >> 4, dptr + dn, @@ -494,13 +516,18 @@ esp_cbc_encrypt_aes( soff += s->m_len; s = s->m_next; } - } /* free un-needed source mbufs and add dest mbufs to chain */ m_freem(scut->m_next); scut->m_len = scutoff; scut->m_next = d0; + + // free memory + if (sp_aligned != NULL) { + FREE(sp_aligned, M_SECA); + sp_aligned = NULL; + } /* just in case */ bzero(sbuf, sizeof(sbuf)); diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index e771822d8..a8c2017b4 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -527,6 +527,19 @@ nfs3_access_rpc(nfsnode_t np, u_int32_t *access, vfs_context_t ctx) return (error); } +/* + * See if our mount is in trouble. Note this is inherently racey. + */ +static int +nfs_notresponding(struct nfsmount *nmp) +{ + int timeoutmask = NFSSTA_TIMEO | NFSSTA_LOCKTIMEO | NFSSTA_JUKEBOXTIMEO; + if (NMFLAG(nmp, MUTEJUKEBOX)) /* jukebox timeouts don't count as unresponsive if muted */ + timeoutmask &= ~NFSSTA_JUKEBOXTIMEO; + + return ((nmp->nm_state & timeoutmask) || !(nmp->nm_sockflags & NMSOCK_READY)); +} + /* * NFS access vnode op. * For NFS version 2, just return ok. File accesses may fail later. @@ -573,21 +586,6 @@ nfs_vnop_access( * in the cache. */ - /* - * In addition if the kernel is checking for access, KAUTH_VNODE_ACCESS - * not set, just return. At this moment do not know what the state of - * the server is and what ever we get back be it either yea or nay is - * going to be stale. Finder (Desktop services/FileURL) might hang when - * going over the wire when just asking getattrlist for the roots FSID - * since we are going to be called to see if we're authorized for - * search. Since we are returning without checking the cache and/or - * going over the wire, it makes no sense to update the cache. - * - * N.B. This is also the strategy that SMB is using. - */ - if (!(ap->a_action & KAUTH_VNODE_ACCESS)) - return (0); - /* * Convert KAUTH primitives to NFS access rights. */ @@ -656,12 +654,39 @@ nfs_vnop_access( dorpc = 0; waccess = 0; } else if (NACCESSVALID(np, slot)) { - microuptime(&now); - if ((now.tv_sec < (np->n_accessstamp[slot] + nfs_access_cache_timeout)) && - ((np->n_access[slot] & access) == access)) { + /* + * In addition if the kernel is checking for access, i.e., + * KAUTH_VNODE_ACCESS is not set, and the server does not seem + * to be responding just return if we have something in the + * cache even if its stale for the user. If were granted access + * by the cache and we're a kernel access, then call it good + * enough. We want to avoid having this particular request going + * over the wire causing a hang. This is because at this moment + * we do not know what the state of the server is and what ever + * we get back be it either yea or nay is going to be stale. + * Finder (Desktop services/FileURL) might hang when going over + * the wire when just asking getattrlist for the roots FSID + * since we are going to be called to see if we're authorized + * for search. + * + * N.B. This is also the strategy that SMB is using. + */ + int granted = ((np->n_access[slot] & access) == access); + + if (!(ap->a_action & KAUTH_VNODE_ACCESS)) { + if (granted || nfs_notresponding(nmp)) { + dorpc = 0; + waccess = np->n_access[slot]; + } + } else { + int stale; + microuptime(&now); + stale = (now.tv_sec >= (np->n_accessstamp[slot] + nfs_access_cache_timeout)); + if (granted && !stale) { /* OSAddAtomic(1, &nfsstats.accesscache_hits); */ - dorpc = 0; - waccess = np->n_access[slot]; + dorpc = 0; + waccess = np->n_access[slot]; + } } } nfs_node_unlock(np); diff --git a/bsd/sys/cdefs.h b/bsd/sys/cdefs.h index 08363c1d4..ede0beecd 100644 --- a/bsd/sys/cdefs.h +++ b/bsd/sys/cdefs.h @@ -214,7 +214,10 @@ * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55965 */ -#if __STDC_VERSION__ >= 199901L && (!defined(__GNUC__) || defined(__clang__)) +#if defined(__cplusplus) || \ + (__STDC_VERSION__ >= 199901L && \ + !defined(__GNUC_GNU_INLINE__) && \ + (!defined(__GNUC__) || defined(__clang__))) # define __header_inline inline #elif defined(__GNUC__) && defined(__GNUC_STDC_INLINE__) # define __header_inline extern __inline __attribute__((__gnu_inline__)) diff --git a/bsd/sys/dtrace.h b/bsd/sys/dtrace.h index 5267d5019..4d3fffb84 100644 --- a/bsd/sys/dtrace.h +++ b/bsd/sys/dtrace.h @@ -638,7 +638,7 @@ typedef struct dtrace_difv { (uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \ DTRACE_LLQUANTIZE_HIGHSHIFT) -#define DTRACE_LLQUANTIZE_NSTEPS(x) \ +#define DTRACE_LLQUANTIZE_NSTEP(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \ DTRACE_LLQUANTIZE_NSTEPSHIFT) diff --git a/bsd/sys/signal.h b/bsd/sys/signal.h index 49deff231..3aab026e5 100644 --- a/bsd/sys/signal.h +++ b/bsd/sys/signal.h @@ -564,7 +564,7 @@ struct sigstack { sigmask(SIGIOT)|sigmask(SIGEMT)|\ sigmask(SIGFPE)|sigmask(SIGBUS)|\ sigmask(SIGSEGV)|sigmask(SIGSYS)|\ - sigmask(SIGPIPE)) + sigmask(SIGPIPE)|sigmask(SIGKILL)) #define workq_threadmask (threadmask | sigcantmask) diff --git a/bsd/sys/ubc_internal.h b/bsd/sys/ubc_internal.h index 64a75b853..dbc19de88 100644 --- a/bsd/sys/ubc_internal.h +++ b/bsd/sys/ubc_internal.h @@ -127,6 +127,8 @@ struct ubc_info { struct cl_readahead *cl_rahead; /* cluster read ahead context */ struct cl_writebehind *cl_wbehind; /* cluster write behind context */ + struct timespec cs_mtime; /* modify time of file when + first cs_blob was loaded */ struct cs_blob *cs_blobs; /* for CODE SIGNING */ #if CHECK_CS_VALIDATION_BITMAP void *cs_valid_bitmap; /* right now: used only for signed files on the read-only root volume */ @@ -190,6 +192,7 @@ struct cs_blob; int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, off_t, vm_size_t); int ubc_cs_sigpup_add(vnode_t, vm_address_t, vm_size_t); struct cs_blob *ubc_get_cs_blobs(vnode_t); +void ubc_get_cs_mtime(vnode_t, struct timespec *); int ubc_cs_getcdhash(vnode_t, off_t, unsigned char *); kern_return_t ubc_cs_blob_allocate(vm_offset_t *, vm_size_t *); void ubc_cs_blob_deallocate(vm_offset_t, vm_size_t); diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h index be2d81c51..e57f25676 100644 --- a/bsd/sys/vnode_internal.h +++ b/bsd/sys/vnode_internal.h @@ -482,6 +482,8 @@ errno_t vnode_resume(vnode_t); errno_t vnode_suspend(vnode_t); +errno_t vnode_mtime(vnode_t, struct timespec *, vfs_context_t); + errno_t vnode_size(vnode_t, off_t *, vfs_context_t); errno_t vnode_setsize(vnode_t, off_t, int ioflag, vfs_context_t); int vnode_setattr_fallback(vnode_t vp, struct vnode_attr *vap, vfs_context_t ctx); diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index c7e8afd30..3339f7910 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -5051,6 +5051,20 @@ vnode_close(vnode_t vp, int flags, vfs_context_t ctx) return (error); } +errno_t +vnode_mtime(vnode_t vp, struct timespec *mtime, vfs_context_t ctx) +{ + struct vnode_attr va; + int error; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_modify_time); + error = vnode_getattr(vp, &va, ctx); + if (!error) + *mtime = va.va_modify_time; + return error; +} + /* * Returns: 0 Success * vnode_getattr:??? diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index efd4e324e..4b9996834 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -6904,7 +6904,7 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, size_t bufsize; void * bufptr; uio_t auio; - struct direntry entry64; + struct direntry *entry64; struct dirent *dep; int bytesread; int error; @@ -6937,23 +6937,28 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, dep = (struct dirent *)bufptr; bytesread = bufsize - uio_resid(auio); + MALLOC(entry64, struct direntry *, sizeof(struct direntry), + M_TEMP, M_WAITOK); /* * Convert all the entries and copy them out to user's buffer. */ while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) { + size_t enbufsize = DIRENT64_LEN(dep->d_namlen); + + bzero(entry64, enbufsize); /* Convert a dirent to a dirent64. */ - entry64.d_ino = dep->d_ino; - entry64.d_seekoff = 0; - entry64.d_reclen = DIRENT64_LEN(dep->d_namlen); - entry64.d_namlen = dep->d_namlen; - entry64.d_type = dep->d_type; - bcopy(dep->d_name, entry64.d_name, dep->d_namlen + 1); + entry64->d_ino = dep->d_ino; + entry64->d_seekoff = 0; + entry64->d_reclen = enbufsize; + entry64->d_namlen = dep->d_namlen; + entry64->d_type = dep->d_type; + bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1); /* Move to next entry. */ dep = (struct dirent *)((char *)dep + dep->d_reclen); /* Copy entry64 to user's buffer. */ - error = uiomove((caddr_t)&entry64, entry64.d_reclen, uio); + error = uiomove((caddr_t)entry64, entry64->d_reclen, uio); } /* Update the real offset using the offset we got from VNOP_READDIR. */ @@ -6962,6 +6967,7 @@ vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, } uio_free(auio); FREE(bufptr, M_TEMP); + FREE(entry64, M_TEMP); return (error); } } diff --git a/bsd/vm/vm_compressor_backing_file.c b/bsd/vm/vm_compressor_backing_file.c index 0c8453eea..fe74a47ea 100644 --- a/bsd/vm/vm_compressor_backing_file.c +++ b/bsd/vm/vm_compressor_backing_file.c @@ -173,7 +173,7 @@ vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flag upl_size_t upl_size = 0; upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE; - upl_control_flags = UPL_IOSYNC; + upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; if ((flags & SWAP_READ) == FALSE) { upl_create_flags |= UPL_COPYOUT_FROM; diff --git a/bsd/vm/vm_unix.c b/bsd/vm/vm_unix.c index 52f21a6ce..1aa660399 100644 --- a/bsd/vm/vm_unix.c +++ b/bsd/vm/vm_unix.c @@ -98,7 +98,7 @@ #include -int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*); +int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); @@ -1080,13 +1080,14 @@ shared_region_copyin_mappings( * requiring any further setup. */ int -_shared_region_map( +_shared_region_map_and_slide( struct proc *p, int fd, uint32_t mappings_count, struct shared_file_mapping_np *mappings, - memory_object_control_t *sr_file_control, - struct shared_file_mapping_np *mapping_to_slide) + uint32_t slide, + user_addr_t slide_start, + user_addr_t slide_size) { int error; kern_return_t kr; @@ -1248,11 +1249,6 @@ _shared_region_map( goto done; } - if (sr_file_control != NULL) { - *sr_file_control = file_control; - } - - /* get the process's shared region (setup in vm_map_exec()) */ shared_region = vm_shared_region_get(current_task()); @@ -1272,7 +1268,9 @@ _shared_region_map( file_control, file_size, (void *) p->p_fd->fd_rdir, - mapping_to_slide); + slide, + slide_start, + slide_size); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " @@ -1351,11 +1349,8 @@ shared_region_map_and_slide_np( struct shared_region_map_and_slide_np_args *uap, __unused int *retvalp) { - struct shared_file_mapping_np mapping_to_slide; struct shared_file_mapping_np *mappings; - unsigned int mappings_count = uap->count; - - memory_object_control_t sr_file_control; + unsigned int mappings_count = uap->count; kern_return_t kr = KERN_SUCCESS; uint32_t slide = uap->slide; @@ -1404,23 +1399,13 @@ shared_region_map_and_slide_np( } - kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide); + kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings, + slide, + uap->slide_start, uap->slide_size); if (kr != KERN_SUCCESS) { return kr; } - if (slide) { - kr = vm_shared_region_slide(slide, - mapping_to_slide.sfm_file_offset, - mapping_to_slide.sfm_size, - uap->slide_start, - uap->slide_size, - sr_file_control); - if (kr != KERN_SUCCESS) { - vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count); - return kr; - } - } done: return kr; } diff --git a/bsd/vm/vnode_pager.c b/bsd/vm/vnode_pager.c index 4bc408e87..b90ca5366 100644 --- a/bsd/vm/vnode_pager.c +++ b/bsd/vm/vnode_pager.c @@ -123,29 +123,46 @@ vnode_pager_get_filesize(struct vnode *vp) return (vm_object_offset_t) ubc_getsize(vp); } +extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path); + kern_return_t -vnode_pager_get_pathname( +vnode_pager_get_name( struct vnode *vp, char *pathname, - vm_size_t *length_p) + vm_size_t pathname_len, + char *filename, + vm_size_t filename_len, + boolean_t *truncated_path_p) { - int error, len; - - len = (int) *length_p; - error = vn_getpath(vp, pathname, &len); - if (error != 0) { - return KERN_FAILURE; + *truncated_path_p = FALSE; + if (pathname != NULL) { + /* get the path name */ + safe_getpath(vp, NULL, + pathname, (int) pathname_len, + truncated_path_p); + } + if ((pathname == NULL || *truncated_path_p) && + filename != NULL) { + /* get the file name */ + const char *name; + + name = vnode_getname_printable(vp); + strlcpy(filename, name, (size_t) filename_len); + vnode_putname_printable(name); } - *length_p = (vm_size_t) len; return KERN_SUCCESS; } kern_return_t -vnode_pager_get_filename( +vnode_pager_get_mtime( struct vnode *vp, - const char **filename) + struct timespec *current_mtime, + struct timespec *cs_mtime) { - *filename = vp->v_name; + vnode_mtime(vp, current_mtime, vfs_context_current()); + if (cs_mtime != NULL) { + ubc_get_cs_mtime(vp, cs_mtime); + } return KERN_SUCCESS; } diff --git a/config/MasterVersion b/config/MasterVersion index 013405f61..453ceb629 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -13.0.0 +13.1.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/Private.exports b/config/Private.exports index 96bd0eb11..b36bf13bd 100644 --- a/config/Private.exports +++ b/config/Private.exports @@ -12,6 +12,7 @@ __ZNK24IOCPUInterruptController* __ZTV24IOCPUInterruptController _assert_wait_timeout_with_leeway _assert_wait_deadline_with_leeway +_audio_active _b_to_q _bdevsw _boot diff --git a/config/Private.x86_64.exports b/config/Private.x86_64.exports index 62dbcc51c..0a16bb8a4 100644 --- a/config/Private.x86_64.exports +++ b/config/Private.x86_64.exports @@ -14,6 +14,7 @@ _cpu_to_lapic _cpuid_features _cpuid_info _lapic_end_of_interrupt +_lapic_get_cmci_vector _lapic_unmask_perfcnt_interrupt _mp_broadcast _mp_cpus_call diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index 80fb15e23..b1c6b77b3 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -56,9 +56,11 @@ enum IODirection kIODirectionInOut = kIODirectionIn | kIODirectionOut, // these flags are valid for the prepare() method only - kIODirectionPrepareToPhys32 = 0x00000004, - kIODirectionPrepareNoFault = 0x00000008, - kIODirectionPrepareReserved1 = 0x00000010, + kIODirectionPrepareToPhys32 = 0x00000004, + kIODirectionPrepareNoFault = 0x00000008, + kIODirectionPrepareReserved1 = 0x00000010, +#define IODIRECTIONPREPARENONCOHERENTDEFINED 1 + kIODirectionPrepareNonCoherent = 0x00000020, }; #ifdef __LP64__ typedef IOOptionBits IODirection; diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index 83ab5e703..73963e117 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -240,9 +240,6 @@ enum { kVideoMapSize = 32 * 1024 * 1024 }; #define kIOSelectedBootDeviceKey "boot-device" #endif - -enum { kIOHibernateMinPollersNeeded = 2 }; - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ // copy from phys addr to MD @@ -741,7 +738,8 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, IORegistryEntry * next; IORegistryEntry * child; - OSData * data; + IOService * service; + OSData * data; vars->pollers = OSArray::withCapacity(4); if (!vars->pollers) @@ -765,6 +763,11 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, } else if ((poller = OSDynamicCast(IOPolledInterface, obj))) vars->pollers->setObject(poller); + + if ((service = OSDynamicCast(IOService, next)) + && service->getDeviceMemory() + && !vars->pollers->getCount()) break; + if ((num = OSDynamicCast(OSNumber, next->getProperty(kIOMediaPreferredBlockSizeKey)))) vars->blockSize = num->unsigned32BitValue(); child = next; @@ -775,9 +778,10 @@ IOPolledFileOpen( const char * filename, uint64_t setFileSize, if (vars->blockSize < 4096) vars->blockSize = 4096; HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n", - major(hibernate_image_dev), minor(hibernate_image_dev), (long)vars->blockSize, vars->pollers->getCount()); + major(hibernate_image_dev), minor(hibernate_image_dev), (long)vars->blockSize, + vars->pollers->getCount()); - if (vars->pollers->getCount() < kIOHibernateMinPollersNeeded) + if (!vars->pollers->getCount()) { err = kIOReturnUnsupported; continue; diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 3eee2e740..d8be44963 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -2154,8 +2154,9 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) if (!mapper) uplFlags |= UPL_NEED_32BIT_ADDR; if (dataP->fDMAMapNumAddressBits > 32) dataP->fDMAMapNumAddressBits = 32; } - if (kIODirectionPrepareNoFault & forDirection) uplFlags |= UPL_REQUEST_NO_FAULT; - if (kIODirectionPrepareNoZeroFill & forDirection) uplFlags |= UPL_NOZEROFILLIO; + if (kIODirectionPrepareNoFault & forDirection) uplFlags |= UPL_REQUEST_NO_FAULT; + if (kIODirectionPrepareNoZeroFill & forDirection) uplFlags |= UPL_NOZEROFILLIO; + if (kIODirectionPrepareNonCoherent & forDirection) uplFlags |= UPL_REQUEST_FORCE_COHERENCY; mapBase = 0; sharedMem = (ipc_port_t) _memEntry; diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index d0c2d7d2c..06c584b75 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -3872,7 +3872,7 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( // Current factors based on environment and assertions if (sleepTimerMaintenance) currentFactors |= kIOPMSleepFactorSleepTimerWake; - if (standbyEnabled && sleepToStandby) + if (standbyEnabled && sleepToStandby && !gSleepPolicyHandler) currentFactors |= kIOPMSleepFactorSleepTimerWake; if (!clamshellClosed) currentFactors |= kIOPMSleepFactorLidOpen; diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp index faef5cb63..c4bbb3513 100644 --- a/iokit/Kernel/IORegistryEntry.cpp +++ b/iokit/Kernel/IORegistryEntry.cpp @@ -886,7 +886,7 @@ bool IORegistryEntry::getPath( char * path, int * length, OSArray * stack; IORegistryEntry * root; const IORegistryEntry * entry; - IORegistryEntry * parent; + const IORegistryEntry * parent; const OSSymbol * alias; int index; int len, maxLength, compLen, aliasLen; @@ -917,67 +917,62 @@ bool IORegistryEntry::getPath( char * path, int * length, return( ok ); } - entry = this; - parent = entry->getParentEntry( plane ); - if( !parent) - // Error if not attached in plane - return( false); - stack = OSArray::withCapacity( getDepth( plane )); - if( !stack) - return( false); + if (!stack) return( false); RLOCK; + parent = entry = this; root = gRegistryRoot->getChildEntry( plane ); - while( parent && (entry != root)) { + while (parent && (parent != root)) + { // stop below root - stack->setObject( (OSObject *) entry ); entry = parent; parent = entry->getParentEntry( plane ); + stack->setObject( (OSObject *) entry ); } - index = stack->getCount(); - ok = true; - - if( 0 == index) { - - *nextComp++ = '/'; - *nextComp = 0; - len++; - - } else while( ok && ((--index) >= 0)) { - - entry = (IORegistryEntry *) stack->getObject((unsigned int) index ); - assert( entry ); - - if( (alias = entry->hasAlias( plane ))) { - len = plane->nameKey->getLength() + 1; - nextComp = path + len; - - compLen = alias->getLength(); - ok = (maxLength > (len + compLen)); - if( ok) - strlcpy( nextComp, alias->getCStringNoCopy(), compLen + 1); - } else { - compLen = maxLength - len; - ok = entry->getPathComponent( nextComp + 1, &compLen, plane ); - - if( ok && compLen) { - compLen++; - *nextComp = '/'; + ok = (0 != parent); + if (ok) + { + index = stack->getCount(); + if( 0 == index) { + + *nextComp++ = '/'; + *nextComp = 0; + len++; + + } else while( ok && ((--index) >= 0)) { + + entry = (IORegistryEntry *) stack->getObject((unsigned int) index ); + assert( entry ); + + if( (alias = entry->hasAlias( plane ))) { + len = plane->nameKey->getLength() + 1; + nextComp = path + len; + + compLen = alias->getLength(); + ok = (maxLength > (len + compLen)); + if( ok) + strlcpy( nextComp, alias->getCStringNoCopy(), compLen + 1); + } else { + compLen = maxLength - len; + ok = entry->getPathComponent( nextComp + 1, &compLen, plane ); + + if( ok && compLen) { + compLen++; + *nextComp = '/'; + } } - } - if( ok) { - len += compLen; - nextComp += compLen; + if( ok) { + len += compLen; + nextComp += compLen; + } } + *length = len; } - *length = len; - UNLOCK; - stack->release(); return( ok ); diff --git a/osfmk/conf/MASTER.x86_64 b/osfmk/conf/MASTER.x86_64 index 2738aba36..14ec832da 100644 --- a/osfmk/conf/MASTER.x86_64 +++ b/osfmk/conf/MASTER.x86_64 @@ -9,7 +9,7 @@ # Standard Apple MacOS X Configurations: # -------- ---- -------- --------------- # -# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_gzalloc config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap vm_pressure_events kperf kpc memorystatus config_kext_basement config_telemetry importance_inheritance dynamic_codesigning config_nomonitors ] +# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto config_dtrace config_mca config_vmx config_mtrr config_lapic config_counters zleaks config_gzalloc config_sched_traditional config_sched_proto config_sched_grrr config_sched_fixedpriority mach_pagemap vm_pressure_events kperf kpc memorystatus config_kext_basement config_telemetry importance_inheritance dynamic_codesigning ] # DEBUG = [ RELEASE osf_debug debug mach_assert task_zone_info ] # # EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation config_sleep crypto ] diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index 090da7d76..fb171c4dc 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -753,6 +753,7 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p) cpufamily = CPUFAMILY_INTEL_SANDYBRIDGE; break; case CPUID_MODEL_IVYBRIDGE: + case CPUID_MODEL_IVYBRIDGE_EP: cpufamily = CPUFAMILY_INTEL_IVYBRIDGE; break; case CPUID_MODEL_HASWELL: diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index 38b5ac7c0..c114c6bce 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -175,6 +175,7 @@ #define CPUID_MODEL_JAKETOWN 0x2D #define CPUID_MODEL_IVYBRIDGE 0x3A #ifdef PRIVATE +#define CPUID_MODEL_IVYBRIDGE_EP 0x3E #define CPUID_MODEL_CRYSTALWELL 0x46 #endif #define CPUID_MODEL_HASWELL 0x3C diff --git a/osfmk/i386/lapic.h b/osfmk/i386/lapic.h index e83240931..54a97c7a5 100644 --- a/osfmk/i386/lapic.h +++ b/osfmk/i386/lapic.h @@ -326,6 +326,8 @@ extern boolean_t lapic_is_interrupting(uint8_t vector); extern void lapic_interrupt_counts(uint64_t intrs[256]); extern void lapic_disable_timer(void); +extern uint8_t lapic_get_cmci_vector(void); + #define MAX_LAPICIDS (LAPIC_ID_MAX+1) #ifdef MP_DEBUG #define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump() diff --git a/osfmk/i386/lapic_native.c b/osfmk/i386/lapic_native.c index eda81384b..e9983aa79 100644 --- a/osfmk/i386/lapic_native.c +++ b/osfmk/i386/lapic_native.c @@ -954,3 +954,26 @@ lapic_disable_timer(void) } } +/* SPI returning the CMCI vector */ +uint8_t +lapic_get_cmci_vector(void) +{ + uint8_t cmci_vector = 0; +#if CONFIG_MCA + /* CMCI, if available */ + if (mca_is_cmci_present()) + cmci_vector = LAPIC_VECTOR(CMCI); +#endif + return cmci_vector; +} + +#if DEBUG +extern void lapic_trigger_MC(void); +void +lapic_trigger_MC(void) +{ + /* A 64-bit access to any register will do it. */ + volatile uint64_t dummy = *(uint64_t *) (void *) LAPIC_MMIO(ID); + dummy++; +} +#endif diff --git a/osfmk/i386/machine_check.c b/osfmk/i386/machine_check.c index 63aa09824..3e1a8cc7f 100644 --- a/osfmk/i386/machine_check.c +++ b/osfmk/i386/machine_check.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -55,10 +56,6 @@ static boolean_t mca_MCA_present = FALSE; static uint32_t mca_family = 0; static unsigned int mca_error_bank_count = 0; static boolean_t mca_control_MSR_present = FALSE; -static boolean_t mca_threshold_status_present = FALSE; -static boolean_t mca_sw_error_recovery_present = FALSE; -static boolean_t mca_extended_MSRs_present = FALSE; -static unsigned int mca_extended_MSRs_count = 0; static boolean_t mca_cmci_present = FALSE; static ia32_mcg_cap_t ia32_mcg_cap; decl_simple_lock_data(static, mca_lock); @@ -109,13 +106,7 @@ mca_get_availability(void) ia32_mcg_cap.u64 = rdmsr64(IA32_MCG_CAP); mca_error_bank_count = ia32_mcg_cap.bits.count; mca_control_MSR_present = ia32_mcg_cap.bits.mcg_ctl_p; - mca_threshold_status_present = ia32_mcg_cap.bits.mcg_tes_p; - mca_sw_error_recovery_present = ia32_mcg_cap.bits.mcg_ser_p; mca_cmci_present = ia32_mcg_cap.bits.mcg_ext_corr_err_p; - if (family == 0x0F) { - mca_extended_MSRs_present = ia32_mcg_cap.bits.mcg_ext_p; - mca_extended_MSRs_count = ia32_mcg_cap.bits.mcg_ext_cnt; - } } } @@ -234,7 +225,7 @@ mca_save_state(mca_state_t *mca_state) * and don't care about races */ if (x86_package()->mca_state == NULL) - x86_package()->mca_state = mca_state; + x86_package()->mca_state = mca_state; mca_state->mca_is_saved = TRUE; } @@ -246,30 +237,6 @@ mca_check_save(void) mca_save_state(current_cpu_datap()->cpu_mca_state); } -static void mca_dump_64bit_state(void) -{ - kdb_printf("Extended Machine Check State:\n"); - kdb_printf(" IA32_MCG_RAX: 0x%016qx\n", rdmsr64(IA32_MCG_RAX)); - kdb_printf(" IA32_MCG_RBX: 0x%016qx\n", rdmsr64(IA32_MCG_RBX)); - kdb_printf(" IA32_MCG_RCX: 0x%016qx\n", rdmsr64(IA32_MCG_RCX)); - kdb_printf(" IA32_MCG_RDX: 0x%016qx\n", rdmsr64(IA32_MCG_RDX)); - kdb_printf(" IA32_MCG_RSI: 0x%016qx\n", rdmsr64(IA32_MCG_RSI)); - kdb_printf(" IA32_MCG_RDI: 0x%016qx\n", rdmsr64(IA32_MCG_RDI)); - kdb_printf(" IA32_MCG_RBP: 0x%016qx\n", rdmsr64(IA32_MCG_RBP)); - kdb_printf(" IA32_MCG_RSP: 0x%016qx\n", rdmsr64(IA32_MCG_RSP)); - kdb_printf(" IA32_MCG_RFLAGS: 0x%016qx\n", rdmsr64(IA32_MCG_RFLAGS)); - kdb_printf(" IA32_MCG_RIP: 0x%016qx\n", rdmsr64(IA32_MCG_RIP)); - kdb_printf(" IA32_MCG_MISC: 0x%016qx\n", rdmsr64(IA32_MCG_MISC)); - kdb_printf(" IA32_MCG_R8: 0x%016qx\n", rdmsr64(IA32_MCG_R8)); - kdb_printf(" IA32_MCG_R9: 0x%016qx\n", rdmsr64(IA32_MCG_R9)); - kdb_printf(" IA32_MCG_R10: 0x%016qx\n", rdmsr64(IA32_MCG_R10)); - kdb_printf(" IA32_MCG_R11: 0x%016qx\n", rdmsr64(IA32_MCG_R11)); - kdb_printf(" IA32_MCG_R12: 0x%016qx\n", rdmsr64(IA32_MCG_R12)); - kdb_printf(" IA32_MCG_R13: 0x%016qx\n", rdmsr64(IA32_MCG_R13)); - kdb_printf(" IA32_MCG_R14: 0x%016qx\n", rdmsr64(IA32_MCG_R14)); - kdb_printf(" IA32_MCG_R15: 0x%016qx\n", rdmsr64(IA32_MCG_R15)); -} - static void mca_report_cpu_info(void) { @@ -280,91 +247,13 @@ mca_report_cpu_info(void) infop->cpuid_model, infop->cpuid_stepping, infop->cpuid_microcode_version); - kdb_printf(" %s\n", infop->cpuid_brand_string); -} - -static const char *mc8_memory_operation[] = { - [MC8_MMM_GENERIC] = "generic", - [MC8_MMM_READ] = "read", - [MC8_MMM_WRITE] = "write", - [MC8_MMM_ADDRESS_COMMAND] = "address/command", - [MC8_MMM_RESERVED] = "reserved" -}; - -static void -mca_dump_bank_mc8(mca_state_t *state, int i) -{ - mca_mci_bank_t *bank; - ia32_mci_status_t status; - struct ia32_mc8_specific mc8; - int mmm; - - bank = &state->mca_error_bank[i]; - status = bank->mca_mci_status; - mc8 = status.bits_mc8; - mmm = MIN(mc8.memory_operation, MC8_MMM_RESERVED); + kdb_printf(" signature: 0x%x\n", + infop->cpuid_signature); + kdb_printf(" %s\n", + infop->cpuid_brand_string); - kdb_printf( - " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n", - i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in")); - if (!status.bits.val) - return; - - kdb_printf( - " Channel number: %d%s\n" - " Memory Operation: %s\n" - " Machine-specific error: %s%s%s%s%s%s%s%s%s\n" - " COR_ERR_CNT: %d\n", - mc8.channel_number, - IF(mc8.channel_number == 15, " (unknown)"), - mc8_memory_operation[mmm], - IF(mc8.read_ecc, "Read ECC "), - IF(mc8.ecc_on_a_scrub, "ECC on scrub "), - IF(mc8.write_parity, "Write parity "), - IF(mc8.redundant_memory, "Redundant memory "), - IF(mc8.sparing, "Sparing/Resilvering "), - IF(mc8.access_out_of_range, "Access out of Range "), - IF(mc8.rtid_out_of_range, "RTID out of Range "), - IF(mc8.address_parity, "Address Parity "), - IF(mc8.byte_enable_parity, "Byte Enable Parity "), - mc8.cor_err_cnt); - kdb_printf( - " Status bits:\n%s%s%s%s%s%s", - IF(status.bits.pcc, " Processor context corrupt\n"), - IF(status.bits.addrv, " ADDR register valid\n"), - IF(status.bits.miscv, " MISC register valid\n"), - IF(status.bits.en, " Error enabled\n"), - IF(status.bits.uc, " Uncorrected error\n"), - IF(status.bits.over, " Error overflow\n")); - if (status.bits.addrv) - kdb_printf( - " IA32_MC%d_ADDR(0x%x): 0x%016qx\n", - i, IA32_MCi_ADDR(i), bank->mca_mci_addr); - if (status.bits.miscv) { - ia32_mc8_misc_t mc8_misc; - - mc8_misc.u64 = bank->mca_mci_misc; - kdb_printf( - " IA32_MC%d_MISC(0x%x): 0x%016qx\n" - " RTID: %d\n" - " DIMM: %d\n" - " Channel: %d\n" - " Syndrome: 0x%x\n", - i, IA32_MCi_MISC(i), mc8_misc.u64, - mc8_misc.bits.rtid, - mc8_misc.bits.dimm, - mc8_misc.bits.channel, - (int) mc8_misc.bits.syndrome); - } } -static const char *mca_threshold_status[] = { - [THRESHOLD_STATUS_NO_TRACKING] = "No tracking", - [THRESHOLD_STATUS_GREEN] = "Green", - [THRESHOLD_STATUS_YELLOW] = "Yellow", - [THRESHOLD_STATUS_RESERVED] = "Reserved" -}; - static void mca_dump_bank(mca_state_t *state, int i) { @@ -373,54 +262,18 @@ mca_dump_bank(mca_state_t *state, int i) bank = &state->mca_error_bank[i]; status = bank->mca_mci_status; - kdb_printf( - " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n", - i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in")); if (!status.bits.val) return; - kdb_printf( - " MCA error code: 0x%04x\n", - status.bits.mca_error); - kdb_printf( - " Model specific error code: 0x%04x\n", - status.bits.model_specific_error); - if (!mca_threshold_status_present) { - kdb_printf( - " Other information: 0x%08x\n", - status.bits.other_information); - } else { - int threshold = status.bits_tes_p.threshold; - kdb_printf( - " Other information: 0x%08x\n" - " Threshold-based status: %s\n", - status.bits_tes_p.other_information, - (status.bits_tes_p.uc == 0) ? - mca_threshold_status[threshold] : - "Undefined"); - } - if (mca_threshold_status_present && - mca_sw_error_recovery_present) { - kdb_printf( - " Software Error Recovery:\n%s%s", - IF(status.bits_tes_p.ar, " Recovery action reqd\n"), - IF(status.bits_tes_p.s, " Signaling UCR error\n")); - } - kdb_printf( - " Status bits:\n%s%s%s%s%s%s", - IF(status.bits.pcc, " Processor context corrupt\n"), - IF(status.bits.addrv, " ADDR register valid\n"), - IF(status.bits.miscv, " MISC register valid\n"), - IF(status.bits.en, " Error enabled\n"), - IF(status.bits.uc, " Uncorrected error\n"), - IF(status.bits.over, " Error overflow\n")); + kdb_printf(" IA32_MC%d_STATUS(0x%x): 0x%016qx\n", + i, IA32_MCi_STATUS(i), status.u64); + if (status.bits.addrv) - kdb_printf( - " IA32_MC%d_ADDR(0x%x): 0x%016qx\n", + kdb_printf(" IA32_MC%d_ADDR(0x%x): 0x%016qx\n", i, IA32_MCi_ADDR(i), bank->mca_mci_addr); + if (status.bits.miscv) - kdb_printf( - " IA32_MC%d_MISC(0x%x): 0x%016qx\n", + kdb_printf(" IA32_MC%d_MISC(0x%x): 0x%016qx\n", i, IA32_MCi_MISC(i), bank->mca_mci_misc); } @@ -432,19 +285,7 @@ mca_cpu_dump_error_banks(mca_state_t *state) if (!state->mca_is_valid) return; - kdb_printf("MCA error-reporting registers:\n"); for (i = 0; i < mca_error_bank_count; i++ ) { - if (i == 8 && state == x86_package()->mca_state) { - /* - * Fatal Memory Error - */ - - /* Dump MC8 for this package */ - kdb_printf(" Package %d logged:\n", - x86_package()->ppkg_num); - mca_dump_bank_mc8(state, 8); - continue; - } mca_dump_bank(state, i); } } @@ -491,22 +332,11 @@ mca_dump(void) /* * Report machine-check capabilities: */ - kdb_printf( - "Machine-check capabilities 0x%016qx:\n", ia32_mcg_cap.u64); + kdb_printf("Machine-check capabilities: 0x%016qx\n", ia32_mcg_cap.u64); mca_report_cpu_info(); - kdb_printf( - " %d error-reporting banks\n%s%s%s", mca_error_bank_count, - IF(mca_control_MSR_present, - " control MSR present\n"), - IF(mca_threshold_status_present, - " threshold-based error status present\n"), - IF(mca_cmci_present, - " extended corrected memory error handling present\n")); - if (mca_extended_MSRs_present) - kdb_printf( - " %d extended MSRs present\n", mca_extended_MSRs_count); + kdb_printf(" %d error-reporting banks\n", mca_error_bank_count); /* * Dump all processor state: @@ -515,45 +345,29 @@ mca_dump(void) mca_state_t *mcsp = cpu_datap(i)->cpu_mca_state; ia32_mcg_status_t status; - kdb_printf("Processor %d: ", i); if (mcsp == NULL || mcsp->mca_is_saved == FALSE || - mcsp->mca_mcg_status.u64 == 0) { - kdb_printf("no machine-check status reported\n"); - continue; - } - if (!mcsp->mca_is_valid) { - kdb_printf("no valid machine-check state\n"); + mcsp->mca_mcg_status.u64 == 0 || + !mcsp->mca_is_valid) { continue; } status = mcsp->mca_mcg_status; - kdb_printf( - "machine-check status 0x%016qx:\n%s%s%s", status.u64, - IF(status.bits.ripv, " restart IP valid\n"), - IF(status.bits.eipv, " error IP valid\n"), - IF(status.bits.mcip, " machine-check in progress\n")); - + kdb_printf("Processor %d: IA32_MCG_STATUS: 0x%016qx\n", + i, status.u64); mca_cpu_dump_error_banks(mcsp); } - /* - * Dump any extended machine state: - */ - if (mca_extended_MSRs_present) { - mca_dump_64bit_state(); - } - /* Update state to release any other threads. */ mca_dump_state = DUMPED; } extern void mca_exception_panic(void); -extern void mtrr_lapic_cached(void); +extern void lapic_trigger_MC(void); void mca_exception_panic(void) { #if DEBUG - mtrr_lapic_cached(); + lapic_trigger_MC(); #else kprintf("mca_exception_panic() requires DEBUG build\n"); #endif diff --git a/osfmk/i386/machine_check.h b/osfmk/i386/machine_check.h index 70c75c826..138122596 100644 --- a/osfmk/i386/machine_check.h +++ b/osfmk/i386/machine_check.h @@ -144,22 +144,6 @@ typedef union { uint64_t over :BIT1(62); uint64_t val :BIT1(63); } bits_tes_p; - struct ia32_mc8_specific { - uint64_t channel_number :BITS(3,0); - uint64_t memory_operation :BITS(6,4); - uint64_t unused :BITS(15,7); - uint64_t read_ecc :BIT1(16); - uint64_t ecc_on_a_scrub :BIT1(17); - uint64_t write_parity :BIT1(18); - uint64_t redundant_memory :BIT1(19); - uint64_t sparing :BIT1(20); - uint64_t access_out_of_range :BIT1(21); - uint64_t rtid_out_of_range :BIT1(22); - uint64_t address_parity :BIT1(23); - uint64_t byte_enable_parity :BIT1(24); - uint64_t reserved :BITS(37,25); - uint64_t cor_err_cnt :BITS(52,38); - } bits_mc8; uint64_t u64; } ia32_mci_status_t; @@ -169,64 +153,9 @@ typedef union { #define THRESHOLD_STATUS_YELLOW 2 #define THRESHOLD_STATUS_RESERVED 3 -/* MC8 memory operations encoding: */ -#define MC8_MMM_GENERIC 0 -#define MC8_MMM_READ 1 -#define MC8_MMM_WRITE 2 -#define MC8_MMM_ADDRESS_COMMAND 3 -#define MC8_MMM_RESERVED 4 -typedef union { - struct { - uint64_t rtid :BITS(7,0); - uint64_t reserved1 :BITS(15,8); - uint64_t dimm :BITS(17,16); - uint64_t channel :BITS(19,18); - uint64_t reserved2 :BITS(31,20); - uint64_t syndrome :BITS(63,32); - } bits; - uint64_t u64; -} ia32_mc8_misc_t; - typedef uint64_t ia32_mci_addr_t; typedef uint64_t ia32_mci_misc_t; -#define IA32_MCG_EAX (0x180) -#define IA32_MCG_EBX (0x181) -#define IA32_MCG_ECX (0x182) -#define IA32_MCG_EDX (0x183) -#define IA32_MCG_ESI (0x184) -#define IA32_MCG_EDI (0x185) -#define IA32_MCG_EBP (0x186) -#define IA32_MCG_ESP (0x187) -#define IA32_MCG_EFLAGS (0x188) -#define IA32_MCG_EIP (0x189) -#define IA32_MCG_MISC (0x18A) - -#define IA32_MCG_RAX (0x180) -#define IA32_MCG_RBX (0x181) -#define IA32_MCG_RCX (0x182) -#define IA32_MCG_RDX (0x183) -#define IA32_MCG_RSI (0x184) -#define IA32_MCG_RDI (0x185) -#define IA32_MCG_RBP (0x186) -#define IA32_MCG_RSP (0x187) -#define IA32_MCG_RFLAGS (0x188) -#define IA32_MCG_RIP (0x189) -#define IA32_MCG_MISC (0x18A) -#define IA32_MCG_RESERVED1 (0x18B) -#define IA32_MCG_RESERVED2 (0x18C) -#define IA32_MCG_RESERVED3 (0x18D) -#define IA32_MCG_RESERVED4 (0x18E) -#define IA32_MCG_RESERVED5 (0x18F) -#define IA32_MCG_R8 (0x190) -#define IA32_MCG_R9 (0x191) -#define IA32_MCG_R10 (0x192) -#define IA32_MCG_R11 (0x193) -#define IA32_MCG_R12 (0x194) -#define IA32_MCG_R13 (0x195) -#define IA32_MCG_R14 (0x196) -#define IA32_MCG_R15 (0x197) - extern void mca_cpu_alloc(cpu_data_t *cdp); extern void mca_cpu_init(void); extern void mca_dump(void); diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index dc50b4848..50c755b54 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -593,6 +593,7 @@ NMIInterruptHandler(x86_saved_state_t *regs) if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE); current_cpu_datap()->cpu_NMI_acknowledged = TRUE; + i_bit_clear(MP_KDP, ¤t_cpu_datap()->cpu_signals); mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active()); if (pmsafe_debug && !kdp_snapshot) pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL); diff --git a/osfmk/i386/mtrr.c b/osfmk/i386/mtrr.c index 0978551b2..30df3db47 100644 --- a/osfmk/i386/mtrr.c +++ b/osfmk/i386/mtrr.c @@ -694,62 +694,3 @@ pat_init(void) } ml_set_interrupts_enabled(istate); } - -#if DEBUG -void -mtrr_lapic_cached(void); -void -mtrr_lapic_cached(void) -{ - boolean_t istate; - uint32_t lo; - uint32_t hi; - uint64_t lapic_pbase; - uint64_t base; - uint64_t length; - uint32_t type; - unsigned int i; - - /* Find the local APIC physical base address */ - rdmsr(MSR_IA32_APIC_BASE, lo, hi); - lapic_pbase = (lo & MSR_IA32_APIC_BASE_BASE); - - DBG("mtrr_lapic_cached() on cpu %d, lapic_pbase: 0x%016llx\n", - get_cpu_number(), lapic_pbase); - - istate = ml_set_interrupts_enabled(FALSE); - - /* - * Search for the variable range MTRR mapping the lapic. - * Flip its type to WC and return. - */ - for (i = 0; i < mtrr_state.var_count; i++) { - if (!(mtrr_state.var_range[i].mask & IA32_MTRR_PHYMASK_VALID)) - continue; - base = mtrr_state.var_range[i].base & IA32_MTRR_PHYSBASE_MASK; - type = (uint32_t)(mtrr_state.var_range[i].base & IA32_MTRR_PHYSBASE_TYPE); - length = MASK_TO_LEN(mtrr_state.var_range[i].mask); - DBG("%d: base: 0x%016llx size: 0x%016llx type: %d\n", - i, base, length, type); - if (base <= lapic_pbase && - lapic_pbase <= base + length - PAGE_SIZE) { - DBG("mtrr_lapic_cached() matched var: %d\n", i); - mtrr_state.var_range[i].base &=~IA32_MTRR_PHYSBASE_TYPE; - mtrr_state.var_range[i].base |= MTRR_TYPE_WRITECOMBINE; - ml_set_interrupts_enabled(istate); - } - } - - /* - * In case we didn't find a covering variable range, - * we slam WC into the default memory type. - */ - mtrr_state.MTRRdefType = MTRR_TYPE_WRITECOMBINE; - - mtrr_update_cpu(); - - ml_set_interrupts_enabled(istate); - - return; -} -#endif /* DEBUG */ diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index a4b8c62e6..1efffe69c 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -569,12 +569,7 @@ ml_get_maxbusdelay(void) } /* - * Set the maximum delay time allowed for snoop on the bus. - * - * Note that this value will be compared to the amount of time that it takes - * to transition from a non-snooping power state (C4) to a snooping state (C2). - * If maxBusDelay is less than C4C2SnoopDelay, - * we will not enter the lowest power state. + * Advertise a memory access latency tolerance of "mdelay" ns */ void ml_set_maxbusdelay(uint32_t mdelay) diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c index 65291caf3..cc584a9a1 100644 --- a/osfmk/i386/pmap_x86_common.c +++ b/osfmk/i386/pmap_x86_common.c @@ -614,6 +614,10 @@ pmap_enter_options( } if (superpage) /* this path can not be used */ template |= INTEL_PTE_PS; /* to change the page size! */ + + if (old_attributes == template) + goto dont_update_pte; + /* Determine delta, PV locked */ need_tlbflush = ((old_attributes ^ template) != INTEL_PTE_WIRED); @@ -629,6 +633,7 @@ pmap_enter_options( opte = *pte; npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD)); } while (!pmap_cmpx_pte(pte, opte, npte)); +dont_update_pte: if (old_pa_locked) { UNLOCK_PVH(pai); old_pa_locked = FALSE; diff --git a/osfmk/i386/postcode.h b/osfmk/i386/postcode.h index 9440fcbd0..d2a859354 100644 --- a/osfmk/i386/postcode.h +++ b/osfmk/i386/postcode.h @@ -29,6 +29,13 @@ #ifndef _I386_POSTCODE_H_ #define _I386_POSTCODE_H_ +/* + * Postcodes are no longer enabled by default in the DEBUG kernel + * because platforms may not have builtin port 0x80 support. + * To re-enable postcode outpout, uncomment the following define: + */ +//#define DEBUG_POSTCODE 1 + /* Define this to delay about 1 sec after posting each code */ //#define POSTCODE_DELAY 1 @@ -38,7 +45,7 @@ #define SPINCOUNT 300000000 #define CPU_PAUSE() rep; nop -#if DEBUG +#if DEBUG_POSTCODE /* * Macro to output byte value to postcode, destoying register al. * Additionally, if POSTCODE_DELAY, spin for about a second. @@ -101,14 +108,14 @@ movl %ebx, %eax; \ POSTCODE_AL -#else /* DEBUG */ +#else /* DEBUG_POSTCODE */ #define POSTCODE_AL #define POSTCODE_AX #define POSTCODE(X) #define POSTCODE2(X) #define POSTCODE_SAVE_EAX(X) #define POSTCODE32_EBX -#endif /* DEBUG */ +#endif /* DEBUG_POSTCODE */ /* * The following postcodes are defined for stages of early startup: @@ -174,7 +181,7 @@ _postcode2(uint16_t xxxx) { asm volatile("outw %0, %1" : : "a" (xxxx), "N" (POSTPORT)); } -#if DEBUG +#if DEBUG_POSTCODE inline static void postcode(uint8_t xx) { diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index 9935839d9..415ee92fd 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -345,8 +345,15 @@ rtc_set_timescale(uint64_t cycles) rntp->shift = shift; + /* + * On some platforms, the TSC is not reset at warm boot. But the + * rebase time must be relative to the current boot so we can't use + * mach_absolute_time(). Instead, we convert the TSC delta since boot + * to nanoseconds. + */ if (tsc_rebase_abs_time == 0) - tsc_rebase_abs_time = mach_absolute_time(); + tsc_rebase_abs_time = _rtc_tsc_to_nanoseconds( + rdtsc64() - tsc_at_boot, rntp); rtc_nanotime_init(0); } diff --git a/osfmk/i386/tsc.c b/osfmk/i386/tsc.c index f8f4fd665..f79a8fcb2 100644 --- a/osfmk/i386/tsc.c +++ b/osfmk/i386/tsc.c @@ -76,6 +76,7 @@ uint32_t flex_ratio = 0; uint32_t flex_ratio_min = 0; uint32_t flex_ratio_max = 0; +uint64_t tsc_at_boot = 0; #define bit(n) (1ULL << (n)) #define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1)) @@ -91,8 +92,10 @@ uint32_t flex_ratio_max = 0; #define CPU_FAMILY_PENTIUM_M (0x6) static const char FSB_Frequency_prop[] = "FSBFrequency"; +static const char TSC_at_boot_prop[] = "InitialTSC"; /* * This routine extracts the bus frequency in Hz from the device tree. + * Also reads any initial TSC value at boot from the device tree. */ static uint64_t EFI_FSB_frequency(void) @@ -122,6 +125,18 @@ EFI_FSB_frequency(void) } else { kprintf("EFI_FSB_frequency: unexpected size %d\n", size); } + + /* + * While we're here, see if EFI published an initial TSC value. + */ + if (DTGetProperty(entry,TSC_at_boot_prop,&value,&size) == kSuccess) { + if (size == sizeof(uint64_t)) { + tsc_at_boot = *(uint64_t *) value; + kprintf("EFI_FSB_frequency: read %s value: %llu\n", + TSC_at_boot_prop, tsc_at_boot); + } + } + return frequency; } diff --git a/osfmk/i386/tsc.h b/osfmk/i386/tsc.h index 42bb9684d..5595d3870 100644 --- a/osfmk/i386/tsc.h +++ b/osfmk/i386/tsc.h @@ -56,6 +56,7 @@ extern uint64_t busFreq; extern uint32_t flex_ratio; extern uint32_t flex_ratio_min; extern uint32_t flex_ratio_max; +extern uint64_t tsc_at_boot; struct tscInfo { diff --git a/osfmk/kdp/kdp.c b/osfmk/kdp/kdp.c index 02f2d363c..817b94829 100644 --- a/osfmk/kdp/kdp.c +++ b/osfmk/kdp/kdp.c @@ -1295,6 +1295,10 @@ kdp_stackshot(int pid, void *tracebuf, uint32_t tracebuf_size, uint32_t trace_fl if(task->pidsuspended) task_snap->ss_flags |= kPidSuspended; if(task->frozen) task_snap->ss_flags |= kFrozen; + if (task->effective_policy.darwinbg == 1) { + task_snap->ss_flags |= kTaskDarwinBG; + } + if (task->effective_policy.t_sup_active == 1) task_snap->ss_flags |= kTaskIsSuppressed; diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index 30d2bbff2..57aa2bd4d 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -167,6 +167,10 @@ int turn_on_log_leaks = 0; void kernel_early_bootstrap(void) { + /* serverperfmode is needed by timer setup */ + if (PE_parse_boot_argn("serverperfmode", &serverperfmode, sizeof (serverperfmode))) { + serverperfmode = 1; + } lck_mod_init(); @@ -195,10 +199,6 @@ kernel_bootstrap(void) PE_parse_boot_argn("trace_wake", &wake_nkdbufs, sizeof (wake_nkdbufs)); - /* i386_vm_init already checks for this ; do it aagin anyway */ - if (PE_parse_boot_argn("serverperfmode", &serverperfmode, sizeof (serverperfmode))) { - serverperfmode = 1; - } scale_setup(); kernel_bootstrap_kprintf("calling vm_mem_bootstrap\n"); diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index d39ed2047..aa5d2b360 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -161,6 +161,9 @@ lck_attr_t task_lck_attr; lck_grp_t task_lck_grp; lck_grp_attr_t task_lck_grp_attr; +/* Flag set by core audio when audio is playing. Used to stifle EXC_RESOURCE generation when active. */ +int audio_active = 0; + zinfo_usage_store_t tasks_tkm_private; zinfo_usage_store_t tasks_tkm_shared; @@ -3598,6 +3601,11 @@ THIS_PROCESS_IS_CAUSING_TOO_MANY_WAKEUPS__SENDING_EXC_RESOURCE(void) "supressed by a boot-arg\n", procname, pid); return; } + if (audio_active) { + printf("process %s[%d] caught causing excessive wakeups. EXC_RESOURCE " + "supressed due to audio playback\n", procname, pid); + return; + } printf("process %s[%d] caught causing excessive wakeups. Observed wakeups rate " "(per sec): %lld; Maximum permitted wakeups rate (per sec): %lld; Observation " "period: %lld seconds; Task lifetime number of wakeups: %lld\n", diff --git a/osfmk/kern/telemetry.c b/osfmk/kern/telemetry.c index 5419eb48d..724f8a2af 100644 --- a/osfmk/kern/telemetry.c +++ b/osfmk/kern/telemetry.c @@ -528,14 +528,11 @@ void telemetry_take_sample(thread_t thread, uint8_t microsnapshot_flags) tsnap->ss_flags |= kTaskRsrcFlagged; } - proc_get_darwinbgstate(task, &tmp); - - if (tmp & PROC_FLAG_DARWINBG) { + if (task->effective_policy.darwinbg == 1) { tsnap->ss_flags |= kTaskDarwinBG; } - if (tmp & PROC_FLAG_EXT_DARWINBG) { - tsnap->ss_flags |= kTaskExtDarwinBG; - } + + proc_get_darwinbgstate(task, &tmp); if (task->requested_policy.t_role == TASK_FOREGROUND_APPLICATION) { tsnap->ss_flags |= kTaskIsForeground; diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index a8811ca94..6f6fd6ff9 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -164,6 +164,7 @@ extern char * proc_name_address(void *p); #endif /* MACH_BSD */ extern int disable_exc_resource; +extern int audio_active; extern int debug_task; int thread_max = CONFIG_THREAD_MAX; /* Max number of threads */ int task_threadmax = CONFIG_THREAD_MAX; @@ -1584,6 +1585,11 @@ THIS_THREAD_IS_CONSUMING_TOO_MUCH_CPU__SENDING_EXC_RESOURCE(void) return; } + if (audio_active) { + printf("process %s[%d] thread %llu caught burning CPU!; EXC_RESOURCE " + "supressed due to audio playback\n", procname, pid, tid); + return; + } printf("process %s[%d] thread %llu caught burning CPU! " "It used more than %d%% CPU (Actual recent usage: %d%%) over %d seconds. " "thread lifetime cpu usage %d.%06d seconds, (%d.%06d user, %d.%06d system) " diff --git a/osfmk/kern/timer_call.c b/osfmk/kern/timer_call.c index a382c8607..0c75d7707 100644 --- a/osfmk/kern/timer_call.c +++ b/osfmk/kern/timer_call.c @@ -1275,10 +1275,12 @@ timer_longterm_init(void) DBG("timer_longterm_init() tlp: %p, queue: %p\n", tlp, &tlp->queue); /* - * Set the longterm timer threshold. - * Defaults to TIMER_LONGTERM_THRESHOLD; overridden longterm boot-arg + * Set the longterm timer threshold. Defaults to TIMER_LONGTERM_THRESHOLD + * or TIMER_LONGTERM_NONE (disabled) for server; + * overridden longterm boot-arg */ - tlp->threshold.interval = TIMER_LONGTERM_THRESHOLD; + tlp->threshold.interval = serverperfmode ? TIMER_LONGTERM_NONE + : TIMER_LONGTERM_THRESHOLD; if (PE_parse_boot_argn("longterm", &longterm, sizeof (longterm))) { tlp->threshold.interval = (longterm == 0) ? TIMER_LONGTERM_NONE : diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h index afdb148bb..96abd8026 100644 --- a/osfmk/mach/memory_object_types.h +++ b/osfmk/mach/memory_object_types.h @@ -474,9 +474,10 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_REQUEST_SET_DIRTY 0x10000000 #define UPL_REQUEST_NO_FAULT 0x20000000 /* fail if pages not all resident */ #define UPL_NOZEROFILLIO 0x40000000 /* allow non zerofill pages present */ +#define UPL_REQUEST_FORCE_COHERENCY 0x80000000 /* UPL flags known by this kernel */ -#define UPL_VALID_FLAGS 0x7FFFFFFF +#define UPL_VALID_FLAGS 0xFFFFFFFF /* upl abort error flags */ @@ -580,6 +581,7 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_COMMIT_CLEAR_PRECIOUS 0x80 #define UPL_COMMIT_SPECULATE 0x100 #define UPL_COMMIT_FREE_ABSENT 0x200 +#define UPL_COMMIT_WRITTEN_BY_KERNEL 0x400 #define UPL_COMMIT_KERNEL_ONLY_FLAGS (UPL_COMMIT_CS_VALIDATED | UPL_COMMIT_FREE_ABSENT) diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c index 6c155e60a..5d05fa984 100644 --- a/osfmk/vm/bsd_vm.c +++ b/osfmk/vm/bsd_vm.c @@ -715,10 +715,13 @@ vnode_pager_get_object_size( } kern_return_t -vnode_pager_get_object_pathname( +vnode_pager_get_object_name( memory_object_t mem_obj, char *pathname, - vm_size_t *length_p) + vm_size_t pathname_len, + char *filename, + vm_size_t filename_len, + boolean_t *truncated_path_p) { vnode_pager_t vnode_object; @@ -728,15 +731,19 @@ vnode_pager_get_object_pathname( vnode_object = vnode_pager_lookup(mem_obj); - return vnode_pager_get_pathname(vnode_object->vnode_handle, - pathname, - length_p); + return vnode_pager_get_name(vnode_object->vnode_handle, + pathname, + pathname_len, + filename, + filename_len, + truncated_path_p); } kern_return_t -vnode_pager_get_object_filename( - memory_object_t mem_obj, - const char **filename) +vnode_pager_get_object_mtime( + memory_object_t mem_obj, + struct timespec *mtime, + struct timespec *cs_mtime) { vnode_pager_t vnode_object; @@ -746,8 +753,9 @@ vnode_pager_get_object_filename( vnode_object = vnode_pager_lookup(mem_obj); - return vnode_pager_get_filename(vnode_object->vnode_handle, - filename); + return vnode_pager_get_mtime(vnode_object->vnode_handle, + mtime, + cs_mtime); } kern_return_t diff --git a/osfmk/vm/vm_apple_protect.c b/osfmk/vm/vm_apple_protect.c index c6da62e27..11ccb6716 100644 --- a/osfmk/vm/vm_apple_protect.c +++ b/osfmk/vm/vm_apple_protect.c @@ -637,7 +637,7 @@ apple_protect_pager_data_request( } else { boolean_t empty; upl_commit_range(upl, 0, upl->size, - UPL_COMMIT_CS_VALIDATED, + UPL_COMMIT_CS_VALIDATED | UPL_COMMIT_WRITTEN_BY_KERNEL, upl_pl, pl_count, &empty); } diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index 8a1b35d84..207a987ec 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -105,6 +105,8 @@ #include +#include /* for struct timespec */ + #define VM_FAULT_CLASSIFY 0 #define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */ @@ -1562,7 +1564,8 @@ vm_fault_page( */ pmap_sync_page_attributes_phys( m->phys_page); - } + } else + m->written_by_kernel = TRUE; break; case KERN_MEMORY_FAILURE: m->unusual = TRUE; @@ -2457,6 +2460,8 @@ vm_fault_page( * careful not to modify the VM object in any way that is not * legal under a shared lock... */ +extern int proc_selfpid(void); +extern char *proc_name_address(void *p); unsigned long cs_enter_tainted_rejected = 0; unsigned long cs_enter_tainted_accepted = 0; kern_return_t @@ -2635,8 +2640,102 @@ vm_fault_enter(vm_page_t m, if (reject_page) { /* reject the tainted page: abort the page fault */ + int pid; + const char *procname; + task_t task; + vm_object_t file_object, shadow; + vm_object_offset_t file_offset; + char *pathname, *filename; + vm_size_t pathname_len, filename_len; + boolean_t truncated_path; +#define __PATH_MAX 1024 + struct timespec mtime, cs_mtime; + kr = KERN_CODESIGN_ERROR; cs_enter_tainted_rejected++; + + /* get process name and pid */ + procname = "?"; + task = current_task(); + pid = proc_selfpid(); + if (task->bsd_info != NULL) + procname = proc_name_address(task->bsd_info); + + /* get file's VM object */ + file_object = m->object; + file_offset = m->offset; + for (shadow = file_object->shadow; + shadow != VM_OBJECT_NULL; + shadow = file_object->shadow) { + vm_object_lock_shared(shadow); + if (file_object != m->object) { + vm_object_unlock(file_object); + } + file_offset += file_object->vo_shadow_offset; + file_object = shadow; + } + + mtime.tv_sec = 0; + mtime.tv_nsec = 0; + cs_mtime.tv_sec = 0; + cs_mtime.tv_nsec = 0; + + /* get file's pathname and/or filename */ + pathname = NULL; + filename = NULL; + pathname_len = 0; + filename_len = 0; + truncated_path = FALSE; + if (file_object->pager == NULL) { + /* no pager -> no file -> no pathname */ + pathname = (char *) ""; + } else { + pathname = (char *)kalloc(__PATH_MAX * 2); + if (pathname) { + pathname_len = __PATH_MAX; + filename = pathname + pathname_len; + filename_len = __PATH_MAX; + } + vnode_pager_get_object_name(file_object->pager, + pathname, + pathname_len, + filename, + filename_len, + &truncated_path); + vnode_pager_get_object_mtime(file_object->pager, + &mtime, + &cs_mtime); + } + printf("CODE SIGNING: process %d[%s]: " + "rejecting invalid page at address 0x%llx " + "from offset 0x%llx in file \"%s%s%s\" " + "(cs_mtime:%lu.%ld %s mtime:%lu.%ld) " + "(signed:%d validated:%d tainted:%d " + "wpmapped:%d slid:%d)\n", + pid, procname, (addr64_t) vaddr, + file_offset, + pathname, + (truncated_path ? "/.../" : ""), + (truncated_path ? filename : ""), + cs_mtime.tv_sec, cs_mtime.tv_nsec, + ((cs_mtime.tv_sec == mtime.tv_sec && + cs_mtime.tv_nsec == mtime.tv_nsec) + ? "==" + : "!="), + mtime.tv_sec, mtime.tv_nsec, + m->object->code_signed, + m->cs_validated, + m->cs_tainted, + m->wpmapped, + m->slid); + if (file_object != m->object) { + vm_object_unlock(file_object); + } + if (pathname_len != 0) { + kfree(pathname, __PATH_MAX * 2); + pathname = NULL; + filename = NULL; + } } else { /* proceed with the tainted page */ kr = KERN_SUCCESS; @@ -2647,12 +2746,14 @@ vm_fault_enter(vm_page_t m, m->cs_tainted = TRUE; cs_enter_tainted_accepted++; } - if (cs_debug || kr != KERN_SUCCESS) { - printf("CODESIGNING: vm_fault_enter(0x%llx): " - "page %p obj %p off 0x%llx *** INVALID PAGE ***\n", - (long long)vaddr, m, m->object, m->offset); + if (kr != KERN_SUCCESS) { + if (cs_debug) { + printf("CODESIGNING: vm_fault_enter(0x%llx): " + "page %p obj %p off 0x%llx *** INVALID PAGE ***\n", + (long long)vaddr, m, m->object, m->offset); + } #if !SECURE_KERNEL - if (kr != KERN_SUCCESS && cs_enforcement_panic) { + if (cs_enforcement_panic) { panic("CODESIGNING: panicking on invalid page\n"); } #endif @@ -5497,6 +5598,7 @@ vm_page_validate_cs_mapped( } } +extern int panic_on_cs_killed; void vm_page_validate_cs( vm_page_t page) @@ -5541,6 +5643,12 @@ vm_page_validate_cs( return; } + if (panic_on_cs_killed && + page->slid) { + panic("vm_page_validate_cs(%p): page is slid\n", page); + } + assert(!page->slid); + #if CHECK_CS_VALIDATION_BITMAP if ( vnode_pager_cs_check_validation_bitmap( page->object->pager, trunc_page(page->offset + page->object->paging_offset), CS_BITMAP_CHECK ) == KERN_SUCCESS) { page->cs_validated = TRUE; diff --git a/osfmk/vm/vm_options.h b/osfmk/vm/vm_options.h index 9128a12ac..abb7dddea 100644 --- a/osfmk/vm/vm_options.h +++ b/osfmk/vm/vm_options.h @@ -30,5 +30,11 @@ #define __VM_VM_OPTIONS_H__ #define UPL_DEBUG DEBUG +// #define VM_PIP_DEBUG + +#define VM_PAGE_BUCKETS_CHECK DEBUG +#if VM_PAGE_BUCKETS_CHECK +#define VM_PAGE_FAKE_BUCKETS 1 +#endif /* VM_PAGE_BUCKETS_CHECK */ #endif /* __VM_VM_OPTIONS_H__ */ diff --git a/osfmk/vm/vm_page.h b/osfmk/vm/vm_page.h index 0e2824f05..fbb9d2971 100644 --- a/osfmk/vm/vm_page.h +++ b/osfmk/vm/vm_page.h @@ -67,6 +67,7 @@ #define _VM_VM_PAGE_H_ #include +#include #include #include @@ -210,6 +211,8 @@ struct vm_page { /* boolean_t */ busy:1, /* page is in transit (O) */ wanted:1, /* someone is waiting for page (O) */ tabled:1, /* page is in VP table (O) */ + hashed:1, /* page is in vm_page_buckets[] + (O) + the bucket lock */ fictitious:1, /* Physical page doesn't exist (O) */ /* * IMPORTANT: the "pmapped" bit can be turned on while holding the @@ -248,7 +251,8 @@ struct vm_page { slid:1, was_dirty:1, /* was this page previously dirty? */ compressor:1, /* page owned by compressor pool */ - __unused_object_bits:7; /* 7 bits available here */ + written_by_kernel:1, /* page was written by kernel (i.e. decompressed) */ + __unused_object_bits:5; /* 5 bits available here */ #if __LP64__ unsigned int __unused_padding; /* Pad structure explicitly @@ -1000,5 +1004,8 @@ extern unsigned int vm_max_delayed_work_limit; extern vm_page_t vm_object_page_grab(vm_object_t); +#if VM_PAGE_BUCKETS_CHECK +extern void vm_page_buckets_check(void); +#endif /* VM_PAGE_BUCKETS_CHECK */ #endif /* _VM_VM_PAGE_H_ */ diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index 9e8d0384d..ad6388a93 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -996,6 +996,10 @@ unsigned int vm_memory_pressure = 0; #define VM_PAGEOUT_STAT_AFTER(i) \ (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1) +#if VM_PAGE_BUCKETS_CHECK +int vm_page_buckets_check_interval = 10; /* in seconds */ +#endif /* VM_PAGE_BUCKETS_CHECK */ + /* * Called from compute_averages(). */ @@ -1005,6 +1009,14 @@ compute_memory_pressure( { unsigned int vm_pageout_next; +#if VM_PAGE_BUCKETS_CHECK + /* check the consistency of VM page buckets at regular interval */ + static int counter = 0; + if ((++counter % vm_page_buckets_check_interval) == 0) { + vm_page_buckets_check(); + } +#endif /* VM_PAGE_BUCKETS_CHECK */ + vm_memory_pressure = vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed; @@ -3726,6 +3738,11 @@ vm_pageout_garbage_collect(int collect) } +#if VM_PAGE_BUCKETS_CHECK +#if VM_PAGE_FAKE_BUCKETS +extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end; +#endif /* VM_PAGE_FAKE_BUCKETS */ +#endif /* VM_PAGE_BUCKETS_CHECK */ void vm_pageout(void) @@ -3867,6 +3884,18 @@ vm_pageout(void) if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) vm_compressor_pager_init(); +#if VM_PAGE_BUCKETS_CHECK +#if VM_PAGE_FAKE_BUCKETS + printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n", + vm_page_fake_buckets_start, vm_page_fake_buckets_end); + pmap_protect(kernel_pmap, + vm_page_fake_buckets_start, + vm_page_fake_buckets_end, + VM_PROT_READ); +// *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */ +#endif /* VM_PAGE_FAKE_BUCKETS */ +#endif /* VM_PAGE_BUCKETS_CHECK */ + vm_pageout_continue(); /* @@ -5609,7 +5638,7 @@ vm_map_remove_upl( return KERN_FAILURE; } - +extern int panic_on_cs_killed; kern_return_t upl_commit_range( upl_t upl, @@ -5801,6 +5830,9 @@ upl_commit_range( m->cs_validated = page_list[entry].cs_validated; m->cs_tainted = page_list[entry].cs_tainted; } + if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL) + m->written_by_kernel = TRUE; + if (upl->flags & UPL_IO_WIRE) { if (page_list) @@ -5820,6 +5852,12 @@ upl_commit_range( * so it will need to be * re-validated. */ + if (panic_on_cs_killed && + m->slid) { + panic("upl_commit_range(%p): page %p was slid\n", + upl, m); + } + assert(!m->slid); m->cs_validated = FALSE; #if DEVELOPMENT || DEBUG vm_cs_validated_resets++; @@ -5887,6 +5925,12 @@ upl_commit_range( * so it will need to be * re-validated. */ + if (panic_on_cs_killed && + m->slid) { + panic("upl_commit_range(%p): page %p was slid\n", + upl, m); + } + assert(!m->slid); m->cs_validated = FALSE; #if DEVELOPMENT || DEBUG vm_cs_validated_resets++; @@ -6033,6 +6077,7 @@ upl_commit_range( */ dwp->dw_mask |= DW_clear_busy; } + /* * Wakeup any thread waiting for the page to be un-cleaning. */ @@ -7098,6 +7143,11 @@ vm_object_iopl_request( if (!(cntrl_flags & UPL_COPYOUT_FROM)) { SET_PAGE_DIRTY(dst_page, TRUE); } + if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) { + pmap_sync_page_attributes_phys(dst_page->phys_page); + dst_page->written_by_kernel = FALSE; + } + record_phys_addr: if (dst_page->busy) upl->flags |= UPL_HAS_BUSY; diff --git a/osfmk/vm/vm_protos.h b/osfmk/vm/vm_protos.h index 533b03ada..0033a2890 100644 --- a/osfmk/vm/vm_protos.h +++ b/osfmk/vm/vm_protos.h @@ -177,13 +177,18 @@ extern void vnode_pager_throttle( extern uint32_t vnode_pager_return_throttle_io_limit( struct vnode *, uint32_t *); -extern kern_return_t vnode_pager_get_pathname( +extern kern_return_t vnode_pager_get_name( struct vnode *vp, char *pathname, - vm_size_t *length_p); -extern kern_return_t vnode_pager_get_filename( + vm_size_t pathname_len, + char *filename, + vm_size_t filename_len, + boolean_t *truncated_path_p); +struct timespec; +extern kern_return_t vnode_pager_get_mtime( struct vnode *vp, - const char **filename); + struct timespec *mtime, + struct timespec *cs_mtime); extern kern_return_t vnode_pager_get_cs_blobs( struct vnode *vp, void **blobs); @@ -219,13 +224,17 @@ extern kern_return_t vnode_pager_get_isSSD( extern kern_return_t vnode_pager_get_throttle_io_limit( memory_object_t, uint32_t *); -extern kern_return_t vnode_pager_get_object_pathname( +extern kern_return_t vnode_pager_get_object_name( memory_object_t mem_obj, char *pathname, - vm_size_t *length_p); -extern kern_return_t vnode_pager_get_object_filename( + vm_size_t pathname_len, + char *filename, + vm_size_t filename_len, + boolean_t *truncated_path_p); +extern kern_return_t vnode_pager_get_object_mtime( memory_object_t mem_obj, - const char **filename); + struct timespec *mtime, + struct timespec *cs_mtime); extern kern_return_t vnode_pager_get_object_cs_blobs( memory_object_t mem_obj, void **blobs); diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index 2bb8118c2..a9fd4d0b2 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -165,6 +165,13 @@ unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */ lck_spin_t *vm_page_bucket_locks; +#if VM_PAGE_BUCKETS_CHECK +boolean_t vm_page_buckets_check_ready = FALSE; +#if VM_PAGE_FAKE_BUCKETS +vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */ +vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end; +#endif /* VM_PAGE_FAKE_BUCKETS */ +#endif /* VM_PAGE_BUCKETS_CHECK */ #if MACH_PAGE_HASH_STATS /* This routine is only for debug. It is intended to be called by @@ -535,6 +542,7 @@ vm_page_bootstrap( m->busy = TRUE; m->wanted = FALSE; m->tabled = FALSE; + m->hashed = FALSE; m->fictitious = FALSE; m->pmapped = FALSE; m->wpmapped = FALSE; @@ -558,6 +566,7 @@ vm_page_bootstrap( m->was_dirty = FALSE; m->xpmapped = FALSE; m->compressor = FALSE; + m->written_by_kernel = FALSE; m->__unused_object_bits = 0; /* @@ -656,6 +665,30 @@ vm_page_bootstrap( if (vm_page_hash_mask & vm_page_bucket_count) printf("vm_page_bootstrap: WARNING -- strange page hash\n"); +#if VM_PAGE_BUCKETS_CHECK +#if VM_PAGE_FAKE_BUCKETS + /* + * Allocate a decoy set of page buckets, to detect + * any stomping there. + */ + vm_page_fake_buckets = (vm_page_bucket_t *) + pmap_steal_memory(vm_page_bucket_count * + sizeof(vm_page_bucket_t)); + vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets; + vm_page_fake_buckets_end = + vm_map_round_page((vm_page_fake_buckets_start + + (vm_page_bucket_count * + sizeof (vm_page_bucket_t))), + PAGE_MASK); + char *cp; + for (cp = (char *)vm_page_fake_buckets_start; + cp < (char *)vm_page_fake_buckets_end; + cp++) { + *cp = 0x5a; + } +#endif /* VM_PAGE_FAKE_BUCKETS */ +#endif /* VM_PAGE_BUCKETS_CHECK */ + vm_page_buckets = (vm_page_bucket_t *) pmap_steal_memory(vm_page_bucket_count * sizeof(vm_page_bucket_t)); @@ -677,6 +710,10 @@ vm_page_bootstrap( for (i = 0; i < vm_page_bucket_lock_count; i++) lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr); +#if VM_PAGE_BUCKETS_CHECK + vm_page_buckets_check_ready = TRUE; +#endif /* VM_PAGE_BUCKETS_CHECK */ + /* * Machine-dependent code allocates the resident page table. * It uses vm_page_init to initialize the page frames. @@ -1045,7 +1082,7 @@ vm_page_insert_internal( #endif /* DEBUG */ if (insert_in_hash == TRUE) { -#if DEBUG +#if DEBUG || VM_PAGE_CHECK_BUCKETS if (mem->tabled || mem->object != VM_OBJECT_NULL) panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) " "already in (obj=%p,off=0x%llx)", @@ -1081,7 +1118,7 @@ vm_page_insert_internal( if (++bucket->cur_count > bucket->hi_count) bucket->hi_count = bucket->cur_count; #endif /* MACH_PAGE_HASH_STATS */ - + mem->hashed = TRUE; lck_spin_unlock(bucket_lock); } @@ -1184,7 +1221,7 @@ vm_page_replace( VM_PAGE_CHECK(mem); #endif vm_object_lock_assert_exclusive(object); -#if DEBUG +#if DEBUG || VM_PAGE_CHECK_BUCKETS if (mem->tabled || mem->object != VM_OBJECT_NULL) panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) " "already in (obj=%p,off=0x%llx)", @@ -1219,6 +1256,7 @@ vm_page_replace( * Remove old page from hash list */ *mp = m->next; + m->hashed = FALSE; found_m = m; break; @@ -1234,6 +1272,7 @@ vm_page_replace( * insert new page at head of hash list */ bucket->pages = mem; + mem->hashed = TRUE; lck_spin_unlock(bucket_lock); @@ -1309,7 +1348,7 @@ vm_page_remove( #if MACH_PAGE_HASH_STATS bucket->cur_count--; #endif /* MACH_PAGE_HASH_STATS */ - + mem->hashed = FALSE; lck_spin_unlock(bucket_lock); } /* @@ -4281,6 +4320,14 @@ vm_page_find_contiguous( m2->was_dirty = m1->was_dirty; m2->compressor = m1->compressor; + /* + * page may need to be flushed if + * it is marshalled into a UPL + * that is going to be used by a device + * that doesn't support coherency + */ + m2->written_by_kernel = TRUE; + /* * make sure we clear the ref/mod state * from the pmap layer... else we risk @@ -6007,7 +6054,7 @@ hibernate_hash_insert_page(vm_page_t mem) vm_page_bucket_t *bucket; int hash_id; - assert(mem->tabled); + assert(mem->hashed); assert(mem->object); assert(mem->offset != (vm_object_offset_t) -1); @@ -6091,7 +6138,7 @@ hibernate_rebuild_vm_structs(void) *tmem = *mem; mem = tmem; } - if (mem->tabled) + if (mem->hashed) hibernate_hash_insert_page(mem); /* * the 'hole' between this vm_page_t and the previous @@ -6108,7 +6155,7 @@ hibernate_rebuild_vm_structs(void) assert(vm_page_free_count == hibernate_teardown_vm_page_free_count); /* - * process the list of vm_page_t's that were tabled in the hash, + * process the list of vm_page_t's that were entered in the hash, * but were not located in the vm_pages arrary... these are * vm_page_t's that were created on the fly (i.e. fictitious) */ @@ -6161,8 +6208,7 @@ hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_l bucket = &vm_page_buckets[i]; for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem_next) { - - assert(mem->tabled); + assert(mem->hashed); mem_next = mem->next; @@ -6297,3 +6343,76 @@ vm_page_info( return vm_page_bucket_count; } #endif /* MACH_VM_DEBUG */ + +#if VM_PAGE_BUCKETS_CHECK +void +vm_page_buckets_check(void) +{ + unsigned int i; + vm_page_t p; + unsigned int p_hash; + vm_page_bucket_t *bucket; + lck_spin_t *bucket_lock; + + if (!vm_page_buckets_check_ready) { + return; + } + +#if HIBERNATION + if (hibernate_rebuild_needed || + hibernate_rebuild_hash_list) { + panic("BUCKET_CHECK: hibernation in progress: " + "rebuild_needed=%d rebuild_hash_list=%p\n", + hibernate_rebuild_needed, + hibernate_rebuild_hash_list); + } +#endif /* HIBERNATION */ + +#if VM_PAGE_FAKE_BUCKETS + char *cp; + for (cp = (char *) vm_page_fake_buckets_start; + cp < (char *) vm_page_fake_buckets_end; + cp++) { + if (*cp != 0x5a) { + panic("BUCKET_CHECK: corruption at %p in fake buckets " + "[0x%llx:0x%llx]\n", + cp, + vm_page_fake_buckets_start, + vm_page_fake_buckets_end); + } + } +#endif /* VM_PAGE_FAKE_BUCKETS */ + + for (i = 0; i < vm_page_bucket_count; i++) { + bucket = &vm_page_buckets[i]; + if (bucket->pages == VM_PAGE_NULL) { + continue; + } + + bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK]; + lck_spin_lock(bucket_lock); + p = bucket->pages; + while (p != VM_PAGE_NULL) { + if (!p->hashed) { + panic("BUCKET_CHECK: page %p (%p,0x%llx) " + "hash %d in bucket %d at %p " + "is not hashed\n", + p, p->object, p->offset, + p_hash, i, bucket); + } + p_hash = vm_page_hash(p->object, p->offset); + if (p_hash != i) { + panic("BUCKET_CHECK: corruption in bucket %d " + "at %p: page %p object %p offset 0x%llx " + "hash %d\n", + i, bucket, p, p->object, p->offset, + p_hash); + } + p = p->next; + } + lck_spin_unlock(bucket_lock); + } + +// printf("BUCKET_CHECK: checked buckets\n"); +} +#endif /* VM_PAGE_BUCKETS_CHECK */ diff --git a/osfmk/vm/vm_shared_region.c b/osfmk/vm/vm_shared_region.c index 832502e10..ec74a2810 100644 --- a/osfmk/vm/vm_shared_region.c +++ b/osfmk/vm/vm_shared_region.c @@ -972,12 +972,6 @@ vm_shared_region_undo_mappings( assert(kr2 == KERN_SUCCESS); } - /* - * This is how check_np() knows if the shared region - * is mapped. So clear it here. - */ - shared_region->sr_first_mapping = (mach_vm_offset_t) -1; - if (reset_shared_region_state) { vm_shared_region_lock(); assert(shared_region->sr_ref_count > 1); @@ -1010,7 +1004,9 @@ vm_shared_region_map_file( memory_object_control_t file_control, memory_object_size_t file_size, void *root_dir, - struct shared_file_mapping_np *mapping_to_slide) + uint32_t slide, + user_addr_t slide_start, + user_addr_t slide_size) { kern_return_t kr; vm_object_t file_object; @@ -1023,7 +1019,8 @@ vm_shared_region_map_file( vm_map_offset_t target_address; vm_object_t object; vm_object_size_t obj_size; - boolean_t found_mapping_to_slide = FALSE; + struct shared_file_mapping_np *mapping_to_slide = NULL; + mach_vm_offset_t first_mapping = (mach_vm_offset_t) -1; kr = KERN_SUCCESS; @@ -1099,7 +1096,7 @@ vm_shared_region_map_file( /* * This is the mapping that needs to be slid. */ - if (found_mapping_to_slide == TRUE) { + if (mapping_to_slide != NULL) { SHARED_REGION_TRACE_INFO( ("shared_region: mapping[%d]: " "address:0x%016llx size:0x%016llx " @@ -1113,11 +1110,7 @@ vm_shared_region_map_file( mappings[i].sfm_max_prot, mappings[i].sfm_init_prot)); } else { - if (mapping_to_slide != NULL) { - mapping_to_slide->sfm_file_offset = mappings[i].sfm_file_offset; - mapping_to_slide->sfm_size = mappings[i].sfm_size; - found_mapping_to_slide = TRUE; - } + mapping_to_slide = &mappings[i]; } } @@ -1170,7 +1163,17 @@ vm_shared_region_map_file( VM_INHERIT_DEFAULT); } - if (kr != KERN_SUCCESS) { + if (kr == KERN_SUCCESS) { + /* + * Record the first (chronologically) successful + * mapping in this shared region. + * We're protected by "sr_mapping_in_progress" here, + * so no need to lock "shared_region". + */ + if (first_mapping == (mach_vm_offset_t) -1) { + first_mapping = target_address; + } + } else { if (map_port == MACH_PORT_NULL) { /* * Get rid of the VM object we just created @@ -1224,20 +1227,41 @@ vm_shared_region_map_file( } - /* - * Record the first (chronologically) mapping in - * this shared region. - * We're protected by "sr_mapping_in_progress" here, - * so no need to lock "shared_region". - */ - if (shared_region->sr_first_mapping == (mach_vm_offset_t) -1) { - shared_region->sr_first_mapping = target_address; + } + + if (kr == KERN_SUCCESS && + slide && + mapping_to_slide != NULL) { + kr = vm_shared_region_slide(slide, + mapping_to_slide->sfm_file_offset, + mapping_to_slide->sfm_size, + slide_start, + slide_size, + file_control); + if (kr != KERN_SUCCESS) { + SHARED_REGION_TRACE_ERROR( + ("shared_region: region_slide(" + "slide:0x%x start:0x%016llx " + "size:0x%016llx) failed 0x%x\n", + slide, + (long long)slide_start, + (long long)slide_size, + kr)); + vm_shared_region_undo_mappings(NULL, + 0, + mappings, + mappings_count); } } vm_shared_region_lock(); assert(shared_region->sr_ref_count > 1); assert(shared_region->sr_mapping_in_progress); + /* set "sr_first_mapping"; dyld uses it to validate the shared cache */ + if (kr == KERN_SUCCESS && + shared_region->sr_first_mapping == (mach_vm_offset_t) -1) { + shared_region->sr_first_mapping = first_mapping; + } /* we're done working on that shared region */ shared_region->sr_mapping_in_progress = FALSE; thread_wakeup((event_t) &shared_region->sr_mapping_in_progress); diff --git a/osfmk/vm/vm_shared_region.h b/osfmk/vm/vm_shared_region.h index cd2caf182..67a9257d0 100644 --- a/osfmk/vm/vm_shared_region.h +++ b/osfmk/vm/vm_shared_region.h @@ -195,7 +195,9 @@ extern kern_return_t vm_shared_region_map_file( memory_object_control_t file_control, memory_object_size_t file_size, void *root_dir, - struct shared_file_mapping_np *mapping_to_slide); + uint32_t slide, + user_addr_t slide_start, + user_addr_t slide_size); extern kern_return_t vm_shared_region_sliding_valid(uint32_t slide); extern kern_return_t vm_shared_region_slide_sanity_check(vm_shared_region_t sr); extern kern_return_t vm_shared_region_slide_init(vm_shared_region_t sr, diff --git a/tools/lldbmacros/memory.py b/tools/lldbmacros/memory.py index d3cbeb3e3..284472709 100644 --- a/tools/lldbmacros/memory.py +++ b/tools/lldbmacros/memory.py @@ -1835,6 +1835,117 @@ def GetRWLEntry(rwlg): rwlg.lck_grp_stat.lck_grp_rw_stat.lck_grp_rw_wait_cnt, rwlg.lck_grp_name) return out_string +#Macro: showlock +@lldb_type_summary(['lck_mtx_t *']) +@header("===== Mutex Lock Summary =====") +def GetMutexLockSummary(mtx): + """ Summarize mutex lock with important information. + params: + mtx: value - obj representing a mutex lock in kernel + returns: + out_str - summary of the mutex lock + """ + if not mtx: + return "Invalid lock value: 0x0" + + if kern.arch == "x86_64": + out_str = "Lock Type\t\t: MUTEX\n" + mtxd = mtx.lck_mtx_sw.lck_mtxd + out_str += "Owner Thread\t\t: {:#x}\n".format(mtxd.lck_mtxd_owner) + cmd_str = "p/d ((lck_mtx_t*){:#x})->lck_mtx_sw.lck_mtxd.".format(mtx) + cmd_out = lldb_run_command(cmd_str + "lck_mtxd_waiters") + out_str += "Number of Waiters\t: {:s}\n".format(cmd_out.split()[-1]) + cmd_out = lldb_run_command(cmd_str + "lck_mtxd_ilocked") + out_str += "ILocked\t\t\t: {:s}\n".format(cmd_out.split()[-1]) + cmd_out = lldb_run_command(cmd_str + "lck_mtxd_mlocked") + out_str += "MLocked\t\t\t: {:s}\n".format(cmd_out.split()[-1]) + cmd_out = lldb_run_command(cmd_str + "lck_mtxd_promoted") + out_str += "Promoted\t\t: {:s}\n".format(cmd_out.split()[-1]) + cmd_out = lldb_run_command(cmd_str + "lck_mtxd_spin") + out_str += "Spin\t\t\t: {:s}\n".format(cmd_out.split()[-1]) + return out_str + + out_str = "Lock Type\t\t: MUTEX\n" + out_str += "Owner Thread\t\t: {:#x}\n".format(mtx.lck_mtx_hdr.lck_mtxd_data & ~0x3) + out_str += "Number of Waiters\t: {:d}\n".format(mtx.lck_mtx_sw.lck_mtxd.lck_mtxd_waiters) + out_str += "Flags\t\t\t: " + if mtx.lck_mtx_hdr.lck_mtxd_data & 0x1: + out_str += "[Interlock Locked] " + if mtx.lck_mtx_hdr.lck_mtxd_data & 0x2: + out_str += "[Wait Flag]" + if (mtx.lck_mtx_hdr.lck_mtxd_data & 0x3) == 0: + out_str += "None" + return out_str + +@lldb_type_summary(['lck_spin_t *']) +@header("===== SpinLock Summary =====") +def GetSpinLockSummary(spinlock): + """ Summarize spinlock with important information. + params: + spinlock: value - obj representing a spinlock in kernel + returns: + out_str - summary of the spinlock + """ + if not spinlock: + return "Invalid lock value: 0x0" + + out_str = "Lock Type\t\t: SPINLOCK\n" + if kern.arch == "x86_64": + out_str += "Interlock\t\t: {:#x}\n".format(spinlock.interlock) + return out_str + + out_str += "Owner Thread\t\t: {:#x}\n".format(spinlock.lck_spin_data & ~0x3) + out_str += "Flags\t\t\t: " + if spinlock.lck_spin_data & 0x1: + out_str += "[Interlock Locked] " + if spinlock.lck_spin_data & 0x2: + out_str += "[Wait Flag]" + if (spinlock.lck_spin_data & 0x3) == 0: + out_str += "None" + return out_str + +@lldb_command('showlock', 'MS') +def ShowLock(cmd_args=None, cmd_options={}): + """ Show info about a lock - its state and owner thread details + Usage: showlock
+ -M : to consider as lck_mtx_t + -S : to consider as lck_spin_t + """ + if not cmd_args: + raise ArgumentError("Please specify the address of the lock whose info you want to view.") + return + + summary_str = "" + lock = kern.GetValueFromAddress(cmd_args[0], 'uintptr_t*') + + if kern.arch == "x86_64" and lock: + if "-M" in cmd_options: + lock_mtx = Cast(lock, 'lck_mtx_t *') + summary_str = GetMutexLockSummary(lock_mtx) + elif "-S" in cmd_options: + lock_spin = Cast(lock, 'lck_spin_t *') + summary_str = GetSpinLockSummary(lock_spin) + else: + summary_str = "Please specify supported lock option(-M/-S)" + + print summary_str + return + + if lock: + lock_mtx = Cast(lock, 'lck_mtx_t*') + if lock_mtx.lck_mtx_type == 0x22: + summary_str = GetMutexLockSummary(lock_mtx) + + lock_spin = Cast(lock, 'lck_spin_t*') + if lock_spin.lck_spin_type == 0x11: + summary_str = GetSpinLockSummary(lock_spin) + + if summary_str == "": + summary_str = "Lock Type\t\t: INVALID LOCK" + print summary_str + +#EndMacro: showlock + @lldb_command('showallrwlck') def ShowAllRWLck(cmd_args=None): """ Routine to print a summary listing of all read/writer locks