From 2b43307896d33ac5669cf172b186bb117c8bbe93 Mon Sep 17 00:00:00 2001 From: Darwin Date: Mon, 5 Aug 2013 16:46:03 +0000 Subject: [PATCH] xnu-2050.24.15 Imported from https://opensource.apple.com/tarballs/xnu/xnu-2050.24.15.tar.gz --- bsd/hfs/hfs_vnops.c | 24 +++++- bsd/kern/kern_event.c | 18 ++++ bsd/kern/kern_exit.c | 106 ++++++++++++++---------- bsd/kern/kern_fork.c | 7 ++ bsd/kern/policy_check.c | 2 +- bsd/kern/trace.codes | 97 +++++++++++----------- bsd/net/if_vlan.c | 10 ++- bsd/net/pf.c | 48 +++++------ bsd/netinet/tcp_ledbat.c | 10 +-- bsd/nfs/nfs_node.c | 123 +++++++++++++++++++++++++++- bsd/sys/cprotect.h | 3 - bsd/sys/vnode_internal.h | 1 + bsd/vfs/vfs_lookup.c | 14 +++- bsd/vfs/vfs_subr.c | 18 +++- bsd/vfs/vfs_syscalls.c | 5 +- bsd/vfs/vfs_vnops.c | 5 +- bsd/vfs/vfs_xattr.c | 48 +++++++++++ config/MasterVersion | 2 +- iokit/IOKit/IOService.h | 1 + iokit/IOKit/pwr_mgt/IOPMlog.h | 107 ++++++++++++------------ iokit/Kernel/IOPMrootDomain.cpp | 9 +- iokit/Kernel/IOPlatformExpert.cpp | 36 +------- iokit/Kernel/IOServicePM.cpp | 63 +++++++++++++- iokit/Kernel/IOServicePMPrivate.h | 4 + iokit/Kernel/IOTimerEventSource.cpp | 10 +++ iokit/Kernel/IOWorkLoop.cpp | 2 + osfmk/device/iokit_rpc.c | 2 + osfmk/i386/Diagnostics.c | 115 ++++++++++++++++++++++++-- osfmk/i386/Diagnostics.h | 4 +- osfmk/i386/cpu_data.h | 10 +++ osfmk/i386/cpu_topology.h | 1 + osfmk/i386/fpu.c | 2 +- osfmk/i386/machine_routines.c | 9 ++ osfmk/i386/machine_routines.h | 1 + osfmk/i386/pal_routines.h | 9 +- osfmk/i386/pmCPU.c | 54 +++++++++++- osfmk/i386/proc_reg.h | 16 +++- osfmk/kern/host.c | 20 +++++ osfmk/kern/host.h | 11 +++ osfmk/kern/machine.h | 2 + osfmk/kern/processor_data.h | 1 + osfmk/kern/sched_prim.c | 60 ++++++++++++++ osfmk/kern/task.c | 89 +++++++++++++++++++- osfmk/kern/task.h | 6 ++ osfmk/kern/thread.c | 11 +++ osfmk/kern/thread.h | 27 ++++++ osfmk/kern/thread_call.c | 16 +++- osfmk/kern/thread_call.h | 2 + osfmk/kern/timer_call.c | 40 +++++++-- osfmk/kern/timer_call.h | 1 + osfmk/mach/host_info.h | 2 +- osfmk/mach/machine/sdt.h | 9 ++ osfmk/mach/task_info.h | 14 ++++ security/mac_framework.h | 1 + security/mac_policy.h | 19 ++++- security/mac_vfs.c | 13 +++ 56 files changed, 1074 insertions(+), 266 deletions(-) diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 1d0497a0f..fad99d0a4 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -5630,11 +5630,29 @@ hfs_vgetrsrc(struct hfsmount *hfsmp, struct vnode *vp, struct vnode **rvpp, * do a lookup on /tmp/b, you'd acquire an entirely different record's resource * fork. * - * As a result, we use the fileid, which should be invariant for the lifetime - * of the cnode (possibly barring calls to exchangedata). + * As a result, we use the fileid, which should be invariant for the lifetime + * of the cnode (possibly barring calls to exchangedata). + * + * Addendum: We can't do the above for HFS standard since we aren't guaranteed to + * have thread records for files. They were only required for directories. So + * we need to do the lookup with the catalog name. This is OK since hardlinks were + * never allowed on HFS standard. */ - error = cat_idlookup (hfsmp, cp->c_attr.ca_fileid, 0, 1, NULL, NULL, &rsrcfork); + if (hfsmp->hfs_flags & HFS_STANDARD) { + /* + * HFS standard only: + * + * Get the resource fork for this item via catalog lookup + * since HFS standard was case-insensitive only. We don't want the + * descriptor; just the fork data here. + */ + error = cat_lookup (hfsmp, descptr, 1, (struct cat_desc*)NULL, + (struct cat_attr*)NULL, &rsrcfork, NULL); + } + else { + error = cat_idlookup (hfsmp, cp->c_fileid, 0, 1, NULL, NULL, &rsrcfork); + } hfs_systemfile_unlock(hfsmp, lockflags); if (error) { diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index ba269074a..0e2705e5a 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -548,6 +548,24 @@ filt_proc(struct knote *kn, long hint) */ event = (u_int)hint & NOTE_PCTRLMASK; + /* + * termination lifecycle events can happen while a debugger + * has reparented a process, in which case notifications + * should be quashed except to the tracing parent. When + * the debugger reaps the child (either via wait4(2) or + * process exit), the child will be reparented to the original + * parent and these knotes re-fired. + */ + if (event & NOTE_EXIT) { + if ((kn->kn_ptr.p_proc->p_oppid != 0) + && (kn->kn_kq->kq_p->p_pid != kn->kn_ptr.p_proc->p_ppid)) { + /* + * This knote is not for the current ptrace(2) parent, ignore. + */ + return 0; + } + } + /* * if the user is interested in this event, record it. */ diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c index e585c0d27..fa45facf3 100644 --- a/bsd/kern/kern_exit.c +++ b/bsd/kern/kern_exit.c @@ -153,7 +153,7 @@ void vfork_exit(proc_t p, int rv); void vproc_exit(proc_t p); __private_extern__ void munge_user64_rusage(struct rusage *a_rusage_p, struct user64_rusage *a_user_rusage_p); __private_extern__ void munge_user32_rusage(struct rusage *a_rusage_p, struct user32_rusage *a_user_rusage_p); -static int reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int droplock); +static int reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoinit, int locked, int droplock); /* * Things which should have prototypes in headers, but don't @@ -652,6 +652,7 @@ proc_exit(proc_t p) /* wait till parentrefs are dropped and grant no more */ proc_childdrainstart(p); while ((q = p->p_children.lh_first) != NULL) { + int reparentedtoinit = (q->p_listflag & P_LIST_DEADPARENT) ? 1 : 0; q->p_listflag |= P_LIST_DEADPARENT; if (q->p_stat == SZOMB) { if (p != q->p_pptr) @@ -670,7 +671,7 @@ proc_exit(proc_t p) * and the proc struct cannot be used for wakeups as well. * It is safe to use q here as this is system reap */ - (void)reap_child_locked(p, q, 1, 1, 0); + (void)reap_child_locked(p, q, 1, reparentedtoinit, 1, 0); } else { proc_reparentlocked(q, initproc, 0, 1); /* @@ -794,10 +795,6 @@ proc_exit(proc_t p) set_bsdtask_info(task, NULL); knote_hint = NOTE_EXIT | (p->p_xstat & 0xffff); - if (p->p_oppid != 0) { - knote_hint |= NOTE_EXIT_REPARENTED; - } - proc_knote(p, knote_hint); /* mark the thread as the one that is doing proc_exit @@ -836,7 +833,7 @@ proc_exit(proc_t p) p->p_listflag |= P_LIST_DEADPARENT; proc_list_unlock(); } - if ((p->p_listflag & P_LIST_DEADPARENT) == 0) { + if ((p->p_listflag & P_LIST_DEADPARENT) == 0 || p->p_oppid) { if (pp != initproc) { proc_lock(pp); pp->si_pid = p->p_pid; @@ -899,7 +896,7 @@ proc_exit(proc_t p) * and the proc struct cannot be used for wakeups as well. * It is safe to use p here as this is system reap */ - (void)reap_child_locked(pp, p, 1, 1, 1); + (void)reap_child_locked(pp, p, 1, 0, 1, 1); /* list lock dropped by reap_child_locked */ } if (uth->uu_lowpri_window) { @@ -934,7 +931,7 @@ proc_exit(proc_t p) * 1 Process was reaped */ static int -reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int droplock) +reap_child_locked(proc_t parent, proc_t child, int deadparent, int reparentedtoinit, int locked, int droplock) { proc_t trace_parent = PROC_NULL; /* Traced parent process, if tracing */ @@ -949,41 +946,62 @@ reap_child_locked(proc_t parent, proc_t child, int deadparent, int locked, int d * ptraced can simply be reaped, refer to radar 5677288 * p_oppid -> ptraced * trace_parent == initproc -> away from launchd - * P_LIST_DEADPARENT -> came to launchd by reparenting + * reparentedtoinit -> came to launchd by reparenting */ - if (child->p_oppid && (trace_parent = proc_find(child->p_oppid)) - && !((trace_parent == initproc) && (child->p_lflag & P_LIST_DEADPARENT))) { + if (child->p_oppid) { + int knote_hint; + pid_t oppid; + proc_lock(child); + oppid = child->p_oppid; child->p_oppid = 0; + knote_hint = NOTE_EXIT | (child->p_xstat & 0xffff); proc_unlock(child); - if (trace_parent != initproc) { - /* - * proc internal fileds and p_ucred usage safe - * here as child is dead and is not reaped or - * reparented yet - */ - proc_lock(trace_parent); - trace_parent->si_pid = child->p_pid; - trace_parent->si_status = child->p_xstat; - trace_parent->si_code = CLD_CONTINUED; - trace_parent->si_uid = kauth_cred_getruid(child->p_ucred); - proc_unlock(trace_parent); - } - proc_reparentlocked(child, trace_parent, 1, 0); - psignal(trace_parent, SIGCHLD); - proc_list_lock(); - wakeup((caddr_t)trace_parent); - child->p_listflag &= ~P_LIST_WAITING; - wakeup(&child->p_stat); - proc_list_unlock(); - proc_rele(trace_parent); - if ((locked == 1) && (droplock == 0)) + + if ((trace_parent = proc_find(oppid)) + && !((trace_parent == initproc) && reparentedtoinit)) { + + if (trace_parent != initproc) { + /* + * proc internal fileds and p_ucred usage safe + * here as child is dead and is not reaped or + * reparented yet + */ + proc_lock(trace_parent); + trace_parent->si_pid = child->p_pid; + trace_parent->si_status = child->p_xstat; + trace_parent->si_code = CLD_CONTINUED; + trace_parent->si_uid = kauth_cred_getruid(child->p_ucred); + proc_unlock(trace_parent); + } + proc_reparentlocked(child, trace_parent, 1, 0); + + /* resend knote to original parent (and others) after reparenting */ + proc_knote(child, knote_hint); + + psignal(trace_parent, SIGCHLD); proc_list_lock(); - return (0); - } - - if (trace_parent != PROC_NULL) { - proc_rele(trace_parent); + wakeup((caddr_t)trace_parent); + child->p_listflag &= ~P_LIST_WAITING; + wakeup(&child->p_stat); + proc_list_unlock(); + proc_rele(trace_parent); + if ((locked == 1) && (droplock == 0)) + proc_list_lock(); + return (0); + } + + /* + * If we can't reparent (e.g. the original parent exited while child was being debugged, or + * original parent is the same as the debugger currently exiting), we still need to satisfy + * the knote lifecycle for other observers on the system. While the debugger was attached, + * the NOTE_EXIT would not have been broadcast during initial child termination. + */ + proc_knote(child, knote_hint); + + if (trace_parent != PROC_NULL) { + proc_rele(trace_parent); + } } proc_knote(child, NOTE_REAP); @@ -1148,6 +1166,8 @@ wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, int32_t *retval) if (p->p_stat == SZOMB) { + int reparentedtoinit = (p->p_listflag & P_LIST_DEADPARENT) ? 1 : 0; + proc_list_unlock(); #if CONFIG_MACF if ((error = mac_proc_check_wait(q, p)) != 0) @@ -1208,7 +1228,7 @@ wait4_nocancel(proc_t q, struct wait4_nocancel_args *uap, int32_t *retval) } /* Clean up */ - (void)reap_child_locked(q, p, 0, 0, 0); + (void)reap_child_locked(q, p, 0, reparentedtoinit, 0, 0); return (0); } @@ -1410,7 +1430,7 @@ waitid_nocancel(proc_t q, struct waitid_nocancel_args *uap, /* Prevent other process for waiting for this event? */ if (!(uap->options & WNOWAIT)) { - (void) reap_child_locked(q, p, 0, 0, 0); + (void) reap_child_locked(q, p, 0, 0, 0, 0); return (0); } goto out; @@ -1789,7 +1809,7 @@ vproc_exit(proc_t p) * and the proc struct cannot be used for wakeups as well. * It is safe to use q here as this is system reap */ - (void)reap_child_locked(p, q, 1, 1, 0); + (void)reap_child_locked(p, q, 1, 0, 1, 0); } else { proc_reparentlocked(q, initproc, 0, 1); /* @@ -1967,7 +1987,7 @@ vproc_exit(proc_t p) * and the proc struct cannot be used for wakeups as well. * It is safe to use p here as this is system reap */ - (void)reap_child_locked(pp, p, 0, 1, 1); + (void)reap_child_locked(pp, p, 0, 0, 1, 1); /* list lock dropped by reap_child_locked */ } proc_rele(pp); diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 37d02887f..2f09ba8ee 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -109,6 +109,7 @@ extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t); #include #include #include +#include #include #include @@ -812,6 +813,12 @@ fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int task_deallocate(child_task); child_task = NULL; } + + /* + * Tag thread as being the first thread in its task. + */ + thread_set_tag(child_thread, THREAD_TAG_MAINTHREAD); + bad: thread_yield_internal(1); diff --git a/bsd/kern/policy_check.c b/bsd/kern/policy_check.c index 09c792833..270a1bbc2 100644 --- a/bsd/kern/policy_check.c +++ b/bsd/kern/policy_check.c @@ -463,7 +463,7 @@ static struct mac_policy_ops policy_ops = { CHECK_SET_VOID_HOOK(thread_label_init) CHECK_SET_VOID_HOOK(thread_label_destroy) .mpo_reserved18 = common_void_hook, - .mpo_reserved19 = common_void_hook, + CHECK_SET_VOID_HOOK(vnode_notify_open) .mpo_reserved20 = common_void_hook, .mpo_reserved21 = common_void_hook, .mpo_reserved22 = common_void_hook, diff --git a/bsd/kern/trace.codes b/bsd/kern/trace.codes index 5ccd2bbff..af5a27b19 100644 --- a/bsd/kern/trace.codes +++ b/bsd/kern/trace.codes @@ -1044,6 +1044,54 @@ 0x5040010 CQ_psema 0x5040014 CQ_plock 0x5040018 CG_action +0x5070004 PM_SetParent +0x5070008 PM_AddChild +0x507000c PM_RemoveChild +0x5070010 PM_CtrlDriver +0x5070014 PM_CtrlDriverErr1 +0x5070018 PM_CtrlDriverErr2 +0x507001c PM_CtrlDriverErr3 +0x5070020 PM_CtrlDriverErr4 +0x5070024 PM_InterestDriver +0x5070028 PM_InterestDriverAckErr1 +0x507002c PM_ChildAck +0x5070030 PM_InterestDriverAck +0x5070034 PM_InterestDriverAckErr2 +0x5070038 PM_InterestDriverAckErr3 +0x507003c PM_CtrlDriverAckErr4 +0x5070040 PM_CtrlDriverAck +0x5070044 PM_DomainWillChange +0x5070048 PM_DomainDidChange +0x507004c PM_RequestDomainState +0x5070050 PM_MakeUsable +0x5070054 PM_ChangeStateTo +0x5070058 PM_ChangeStateToPriv +0x507005c PM_SetAggressiveness +0x5070060 PM_CriticalTemp +0x5070064 PM_OverrideOn +0x5070068 PM_OverrideOff +0x5070074 PM_ChangeDone +0x5070078 PM_CtrlDriverTardy +0x507007c PM_InterestDriverTardy +0x5070080 PM_StartAckTimer +0x5070084 PM_StartParentChange +0x5070088 PM_AmendParentChange +0x507008c PM_StartDeviceChange +0x5070090 PM_RequestDenied +0x5070094 PM_CtrlDriverErr5 +0x5070098 PM_ProgramHardware +0x507009c PM_InformWillChange +0x50700a0 PM_InformDidChange +0x50700a4 PM_RemoveDriver +0x50700a8 PM_SetIdleTimer +0x50700ac PM_SystemWake +0x50700b4 PM_ClientAck +0x50700b8 PM_ClientTardy +0x50700bc PM_ClientCancel +0x50700c0 PM_ClientNotify +0x50700c4 PM_AppNotify +0x50700d4 PM_IdleCancel +0x50700d8 PM_SystemTracePoint 0x5080004 IOSERVICE_BUSY 0x5080008 IOSERVICE_NONBUSY 0x508000c IOSERVICE_MODULESTALL @@ -1064,55 +1112,6 @@ 0x5080048 IOSERVICE_KEXTD_ALIVE 0x508004C IOSERVICE_KEXTD_READY 0x5080050 IOSERVICE_REGISTRY_QUIET -0x5100004 PM_SetParent -0x5100008 PM_AddChild -0x510000c PM_RemoveChild -0x5100010 PM_CtrlDriver -0x5100014 PM_CtrlDrvrE1 -0x5100018 PM_CtrlDrvrE2 -0x510001c PM_CtrlDrvrE3 -0x5100020 PM_CtrlDrvrE4 -0x5100024 PM_IntDriver -0x5100028 PM_AckE1 -0x510002c PM_ChildAck -0x5100030 PM_DriverAck -0x5100034 PM_AckE2 -0x5100038 PM_AckE3 -0x510003c PM_AckE4 -0x5100040 PM_DrvrAckSPwr -0x5100044 PM_WillChange -0x5100048 PM_DidChange -0x510004c PM_ReqstDomain -0x5100050 PM_MakeUsable -0x5100054 PM_ChangeTo -0x5100058 PM_ChngeToPriv -0x510005c PM_SetAggrssvs -0x5100060 PM_CritclTemp -0x5100064 PM_OverrideOn -0x5100068 PM_OverrideOff -0x510006c PM_EnqueueErr -0x5100070 PM_CollapseQ -0x5100074 PM_ChangeDone -0x5100078 PM_CtrlDrvTrdy -0x510007c PM_IntDrvrTrdy -0x5100080 PM_StartAckTmr -0x5100084 PM_ParentChnge -0x5100088 PM_AmndPrnChng -0x510008c PM_DeviceChnge -0x5100090 PM_ReqDenied -0x5100094 PM_CtrlDrvrE45 -0x5100098 PM_PrgrmHrdwre -0x510009c PM_InfDrvrPre -0x51000a0 PM_InfDrvrPost -0x51000a4 PM_RemoveDrivr -0x51000a8 PM_IdlTimerPrd -0x51000ac PM_SystemWake -0x51000b0 PM_AckE5 -0x51000b4 PM_ClientAck -0x51000b8 PM_ClientTardy -0x51000bc PM_ClientCancl -0x51000c0 PM_ClientNotfy -0x51000c4 PM_AppNotify 0x5230000 HID_Unexpected 0x5230004 HID_KeyboardLEDThreadTrigger 0x5230008 HID_KeyboardLEDThreadActive diff --git a/bsd/net/if_vlan.c b/bsd/net/if_vlan.c index c1a5f0a20..273feea93 100644 --- a/bsd/net/if_vlan.c +++ b/bsd/net/if_vlan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2012 Apple Inc. All rights reserved. + * Copyright (c) 2003-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -1401,9 +1401,6 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag) vlan_lock(); goto signal_done; } - /* mark the parent interface up */ - ifnet_set_flags(p, IFF_UP, IFF_UP); - (void)ifnet_ioctl(p, 0, SIOCSIFFLAGS, (caddr_t)NULL); } /* configure parent to receive our multicast addresses */ @@ -1462,6 +1459,11 @@ vlan_config(struct ifnet * ifp, struct ifnet * p, int tag) if (ifv != NULL) { ifvlan_release(ifv); } + if (first_vlan) { + /* mark the parent interface up */ + ifnet_set_flags(p, IFF_UP, IFF_UP); + (void)ifnet_ioctl(p, 0, SIOCSIFFLAGS, (caddr_t)NULL); + } return 0; signal_done: diff --git a/bsd/net/pf.c b/bsd/net/pf.c index b13db985e..50fc5bd03 100644 --- a/bsd/net/pf.c +++ b/bsd/net/pf.c @@ -5105,7 +5105,6 @@ pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif, int asd = 0; int match = 0; u_int8_t icmptype = 0, icmpcode = 0; - union pf_state_xport nxport, sxport, dxport; struct ip_fw_args dnflow; struct pf_rule *prev_matching_rule = fwa ? fwa->fwa_pf_rule : NULL; int found_prev_rule = (prev_matching_rule) ? 0 : 1; @@ -5115,39 +5114,31 @@ pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif, if (!DUMMYNET_LOADED) return (PF_PASS); - if (TAILQ_EMPTY(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr)) { + if (TAILQ_EMPTY(pf_main_ruleset.rules[PF_RULESET_DUMMYNET].active.ptr)) return (PF_PASS); - } + bzero(&dnflow, sizeof(dnflow)); hdrlen = 0; - sxport.spi = 0; - dxport.spi = 0; - nxport.spi = 0; /* Fragments don't gave protocol headers */ if (!(pd->flags & PFDESC_IP_FRAG)) switch (pd->proto) { case IPPROTO_TCP: dnflow.fwa_id.flags = pd->hdr.tcp->th_flags; - dnflow.fwa_id.dst_port = pd->hdr.tcp->th_dport; - dnflow.fwa_id.src_port = pd->hdr.tcp->th_sport; - sxport.port = pd->hdr.tcp->th_sport; - dxport.port = pd->hdr.tcp->th_dport; + dnflow.fwa_id.dst_port = ntohs(pd->hdr.tcp->th_dport); + dnflow.fwa_id.src_port = ntohs(pd->hdr.tcp->th_sport); hdrlen = sizeof (*th); break; case IPPROTO_UDP: - dnflow.fwa_id.dst_port = pd->hdr.udp->uh_dport; - dnflow.fwa_id.src_port = pd->hdr.udp->uh_sport; - sxport.port = pd->hdr.udp->uh_sport; - dxport.port = pd->hdr.udp->uh_dport; + dnflow.fwa_id.dst_port = ntohs(pd->hdr.udp->uh_dport); + dnflow.fwa_id.src_port = ntohs(pd->hdr.udp->uh_sport); hdrlen = sizeof (*pd->hdr.udp); break; #if INET case IPPROTO_ICMP: - if (pd->af != AF_INET) + if (af != AF_INET) break; - sxport.port = dxport.port = pd->hdr.icmp->icmp_id; hdrlen = ICMP_MINLEN; icmptype = pd->hdr.icmp->icmp_type; icmpcode = pd->hdr.icmp->icmp_code; @@ -5155,24 +5146,18 @@ pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif, #endif /* INET */ #if INET6 case IPPROTO_ICMPV6: - if (pd->af != AF_INET6) + if (af != AF_INET6) break; - sxport.port = dxport.port = pd->hdr.icmp6->icmp6_id; hdrlen = sizeof (*pd->hdr.icmp6); icmptype = pd->hdr.icmp6->icmp6_type; icmpcode = pd->hdr.icmp6->icmp6_code; break; #endif /* INET6 */ case IPPROTO_GRE: - if (pd->proto_variant == PF_GRE_PPTP_VARIANT) { - sxport.call_id = dxport.call_id = - pd->hdr.grev1->call_id; + if (pd->proto_variant == PF_GRE_PPTP_VARIANT) hdrlen = sizeof (*pd->hdr.grev1); - } break; case IPPROTO_ESP: - sxport.spi = 0; - dxport.spi = pd->hdr.esp->spi; hdrlen = sizeof (*pd->hdr.esp); break; } @@ -5298,10 +5283,21 @@ pf_test_dummynet(struct pf_rule **rm, int direction, struct pfi_kif *kif, dnflow.fwa_cookie = r->dnpipe; dnflow.fwa_pf_rule = r; - dnflow.fwa_id.addr_type = (af == AF_INET) ? 4 : 6; dnflow.fwa_id.proto = pd->proto; dnflow.fwa_flags = r->dntype; - + switch (af) { + case AF_INET: + dnflow.fwa_id.addr_type = 4; + dnflow.fwa_id.src_ip = ntohl(saddr->v4.s_addr); + dnflow.fwa_id.dst_ip = ntohl(daddr->v4.s_addr); + break; + case AF_INET6: + dnflow.fwa_id.addr_type = 6; + dnflow.fwa_id.src_ip6 = saddr->v6; + dnflow.fwa_id.dst_ip6 = saddr->v6; + break; + } + if (fwa != NULL) { dnflow.fwa_oif = fwa->fwa_oif; dnflow.fwa_oflags = fwa->fwa_oflags; diff --git a/bsd/netinet/tcp_ledbat.c b/bsd/netinet/tcp_ledbat.c index 1d1d5e5e7..d13dc50bf 100644 --- a/bsd/netinet/tcp_ledbat.c +++ b/bsd/netinet/tcp_ledbat.c @@ -108,15 +108,15 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, bg_target_qdelay, CTLFLAG_RW | CTLFLAG_LOCKE * max_allowed_cwnd = allowed_increase + (tether * flight_size) * cwnd = min(cwnd, max_allowed_cwnd) * - * 'Allowed_increase' parameter is set to 2. If the flight size is zero, then - * we want the congestion window to be at least 2 packets to reduce the - * delay induced by delayed ack. This helps when the receiver is acking every - * other packet. + * 'Allowed_increase' parameter is set to 8. If the flight size is zero, then + * we want the congestion window to be at least 8 packets to reduce the + * delay induced by delayed ack. This helps when the receiver is acking + * more than 2 packets at a time (stretching acks for better performance). * * 'Tether' is also set to 2. We do not want this to limit the growth of cwnd * during slow-start. */ -int allowed_increase = 2; +int allowed_increase = 8; SYSCTL_INT(_net_inet_tcp, OID_AUTO, bg_allowed_increase, CTLFLAG_RW | CTLFLAG_LOCKED, &allowed_increase, 1, "Additive constant used to calculate max allowed congestion window"); diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c index 7fc9ddaef..9563cc5de 100644 --- a/bsd/nfs/nfs_node.c +++ b/bsd/nfs/nfs_node.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2013 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -132,6 +133,48 @@ nfs_hash(u_char *fhp, int fhsize) return (fhsum); } + +int nfs_case_insensitive(mount_t); + +int +nfs_case_insensitive(mount_t mp) +{ + struct nfsmount *nmp = VFSTONFS(mp); + int answer = 0; + int skip = 0; + + if (nmp == NULL) { + return (0); + } + + if (nmp->nm_vers == NFS_VER2) { + /* V2 has no way to know */ + return (0); + } + + lck_mtx_lock(&nmp->nm_lock); + if (nmp->nm_vers == NFS_VER3) { + if (!(nmp->nm_state & NFSSTA_GOTPATHCONF)) { + /* We're holding the node lock so we just return + * with answer as case sensitive. Is very rare + * for file systems not to be homogenous w.r.t. pathconf + */ + skip = 1; + } + } else if (!(nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_HOMOGENEOUS)) { + /* no pathconf info cached */ + skip = 1; + } + + if (!skip && NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_CASE_INSENSITIVE)) + answer = 1; + + lck_mtx_unlock(&nmp->nm_lock); + + return (answer); +} + + /* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! @@ -234,6 +277,84 @@ nfs_nget( } else { if (dnp && cnp && (flags & NG_MAKEENTRY)) cache_enter(NFSTOV(dnp), vp, cnp); + /* + * Update the vnode if the name/and or the parent has + * changed. We need to do this so that if getattrlist is + * called asking for ATTR_CMN_NAME, that the "most" + * correct name is being returned if we're not making an + * entry. In addition for monitored vnodes we need to + * kick the vnode out of the name cache. We do this so + * that if there are hard links in the same directory + * the link will not be found and a lookup will get us + * here to return the name of the current link. In + * addition by removing the name from the name cache the + * old name will not be found after a rename done on + * another client or the server. The principle reason + * to do this is because Finder is asking for + * notifications on a directory. The directory changes, + * Finder gets notified, reads the directory (which we + * have purged) and for each entry returned calls + * getattrlist with the name returned from + * readdir. gettattrlist has to call namei/lookup to + * resolve the name, because its not in the cache we end + * up here. We need to update the name so Finder will + * get the name it called us with. + * + * We had an imperfect solution with respect to case + * sensitivity. There is a test that is run in + * FileBuster that does renames from some name to + * another name differing only in case. It then reads + * the directory looking for the new name, after it + * finds that new name, it ask gettattrlist to verify + * that the name is the new name. Usually that works, + * but renames generate fsevents and fseventsd will do a + * lookup on the name via lstat. Since that test renames + * old name to new name back and forth there is a race + * that an fsevent will be behind and will access the + * file by the old name, on a case insensitive file + * system that will work. Problem is if we do a case + * sensitive compare, we're going to change the name, + * which the test's getattrlist verification step is + * going to fail. So we will check the case sensitivity + * of the file system and do the appropriate compare. In + * a rare instance for non homogeneous file systems + * w.r.t. pathconf we will use case sensitive compares. + * That could break if the file system is actually case + * insensitive. + * + * Note that V2 does not know the case, so we just + * assume case sensitivity. + * + * This is clearly not perfect due to races, but this is + * as good as its going to get. You can defeat the + * handling of hard links simply by doing: + * + * while :; do ls -l > /dev/null; done + * + * in a terminal window. Even a single ls -l can cause a + * race. + * + * What we really need is for the caller, that + * knows the name being used is valid since it got it + * from a readdir to use that name and not ask for the + * ATTR_CMN_NAME + */ + if (dnp && cnp && (vp != NFSTOV(dnp))) { + int update_flags = vnode_ismonitored((NFSTOV(dnp))) ? VNODE_UPDATE_CACHE : 0; + int (*cmp)(const char *s1, const char *s2, size_t n); + + cmp = nfs_case_insensitive(mp) ? strncasecmp : strncmp; + + if (vp->v_name && cnp->cn_namelen && (*cmp)(cnp->cn_nameptr, vp->v_name, cnp->cn_namelen)) + update_flags |= VNODE_UPDATE_NAME; + if ((vp->v_name == NULL && cnp->cn_namelen != 0) || (vp->v_name != NULL && cnp->cn_namelen == 0)) + update_flags |= VNODE_UPDATE_NAME; + if (vnode_parent(vp) != NFSTOV(dnp)) + update_flags |= VNODE_UPDATE_PARENT; + if (update_flags) + vnode_update_identity(vp, NFSTOV(dnp), cnp->cn_nameptr, cnp->cn_namelen, 0, update_flags); + } + *npp = np; } FSDBG_BOT(263, dnp, *npp, 0xcace0000, error); diff --git a/bsd/sys/cprotect.h b/bsd/sys/cprotect.h index eb0a134fd..0dda075ac 100644 --- a/bsd/sys/cprotect.h +++ b/bsd/sys/cprotect.h @@ -65,9 +65,6 @@ extern "C" { #define CP_READ_ACCESS 0x1 #define CP_WRITE_ACCESS 0x2 -/* - * Check for this version when deciding to enable features - */ #define CONTENT_PROTECTION_XATTR_NAME "com.apple.system.cprotect" #define CP_NEW_MAJOR_VERS 4 #define CP_PREV_MAJOR_VERS 2 diff --git a/bsd/sys/vnode_internal.h b/bsd/sys/vnode_internal.h index e846ac679..af27d9513 100644 --- a/bsd/sys/vnode_internal.h +++ b/bsd/sys/vnode_internal.h @@ -456,6 +456,7 @@ errno_t vnode_makenamedstream(vnode_t, vnode_t *, const char *, int, vfs_contex errno_t vnode_removenamedstream(vnode_t, vnode_t, const char *, int, vfs_context_t); errno_t vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context); errno_t vnode_relenamedstream(vnode_t vp, vnode_t svp, vfs_context_t context); +errno_t vnode_verifynamedstream (vnode_t vp, vfs_context_t ctx); #endif diff --git a/bsd/vfs/vfs_lookup.c b/bsd/vfs/vfs_lookup.c index 2c225cf19..02e1576c1 100644 --- a/bsd/vfs/vfs_lookup.c +++ b/bsd/vfs/vfs_lookup.c @@ -645,7 +645,19 @@ lookup_handle_found_vnode(struct nameidata *ndp, struct componentname *cnp, int } ndp->ni_flag &= ~(NAMEI_TRAILINGSLASH); } - + +#if NAMEDSTREAMS + /* + * Deny namei/lookup requests to resolve paths that point to shadow files. + * Access to shadow files must be conducted by explicit calls to VNOP_LOOKUP + * directly, and not use lookup/namei + */ + if (vnode_isshadow (dp)) { + error = ENOENT; + goto out; + } +#endif + nextname: /* * Not a symbolic link. If more pathname, diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 1c8bfc50e..84fa6ff28 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -5460,7 +5460,23 @@ vn_authorize_open_existing(vnode_t vp, struct componentname *cnp, int fmode, vfs action |= KAUTH_VNODE_WRITE_DATA; } } - return (vnode_authorize(vp, NULL, action, ctx)); + error = vnode_authorize(vp, NULL, action, ctx); + +#if NAMEDSTREAMS + if (error == EACCES) { + /* + * Shadow files may exist on-disk with a different UID/GID + * than that of the current context. Verify that this file + * is really a shadow file. If it was created successfully + * then it should be authorized. + */ + if (vnode_isshadow(vp) && vnode_isnamedstream (vp)) { + error = vnode_verifynamedstream(vp, ctx); + } + } +#endif + + return error; } int diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index 36c3c5e3f..7e68e2982 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -8120,9 +8120,12 @@ vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx) return error; } - /* call out to allow 3rd party notification of open. + /* Call out to allow 3rd party notification of open. * Ignore result of kauth_authorize_fileop call. */ +#if CONFIG_MACF + mac_vnode_notify_open(ctx, vp, fmode); +#endif kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, (uintptr_t)vp, 0); diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index 47552a0d8..671a51fc3 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -177,9 +177,12 @@ vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) goto bad; } - /* call out to allow 3rd party notification of open. + /* Call out to allow 3rd party notification of open. * Ignore result of kauth_authorize_fileop call. */ +#if CONFIG_MACF + mac_vnode_notify_open(ctx, vp, fmode); +#endif kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, (uintptr_t)vp, 0); diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c index 94715ae55..c4587268f 100644 --- a/bsd/vfs/vfs_xattr.c +++ b/bsd/vfs/vfs_xattr.c @@ -627,6 +627,54 @@ vnode_flushnamedstream(vnode_t vp, vnode_t svp, vfs_context_t context) return (error); } +/* + * Verify that the vnode 'vp' is a vnode that lives in the shadow + * directory. We can't just query the parent pointer directly since + * the shadowfile is hooked up to the actual file it's a stream for. + */ +errno_t vnode_verifynamedstream(vnode_t vp, vfs_context_t context) { + int error; + struct vnode *shadow_dvp = NULL; + struct vnode *shadowfile = NULL; + struct componentname cn; + char tmpname[80]; + + + /* Get the shadow directory vnode */ + error = get_shadow_dir(&shadow_dvp, context); + if (error) { + return error; + } + + /* Re-generate the shadow name in the buffer */ + MAKE_SHADOW_NAME (vp, tmpname); + + /* Look up item in shadow dir */ + bzero(&cn, sizeof(cn)); + cn.cn_nameiop = LOOKUP; + cn.cn_flags = ISLASTCN | CN_ALLOWRSRCFORK; + cn.cn_context = context; + cn.cn_pnbuf = tmpname; + cn.cn_pnlen = sizeof(tmpname); + cn.cn_nameptr = cn.cn_pnbuf; + cn.cn_namelen = strlen(tmpname); + + if (VNOP_LOOKUP (shadow_dvp, &shadowfile, &cn, context) == 0) { + /* is the pointer the same? */ + if (shadowfile == vp) { + error = 0; + } + else { + error = EPERM; + } + /* drop the iocount acquired */ + vnode_put (shadowfile); + } + + /* Drop iocount on shadow dir */ + vnode_put (shadow_dvp); + return error; +} static int getshadowfile(vnode_t vp, vnode_t *svpp, int makestream, size_t *rsrcsize, diff --git a/config/MasterVersion b/config/MasterVersion index 84a48a017..80a99e0df 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -12.3.0 +12.4.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h index 9c68d22a5..f3d1bed4a 100644 --- a/iokit/IOKit/IOService.h +++ b/iokit/IOKit/IOService.h @@ -1744,6 +1744,7 @@ class IOService : public IORegistryEntry protected: bool tellClientsWithResponse( int messageType ); void tellClients( int messageType ); + void PMDebug( uint32_t event, uintptr_t param1, uintptr_t param2 ); private: #ifndef __LP64__ diff --git a/iokit/IOKit/pwr_mgt/IOPMlog.h b/iokit/IOKit/pwr_mgt/IOPMlog.h index b9d50eda5..2f82c1b4e 100644 --- a/iokit/IOKit/pwr_mgt/IOPMlog.h +++ b/iokit/IOKit/pwr_mgt/IOPMlog.h @@ -26,58 +26,59 @@ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ enum PMLogEnum { - kPMLogSetParent = 1, // 1 0x05100004 - kPMLogAddChild, // 2 0x05100008 - kPMLogRemoveChild, // 3 0x0510000c - kPMLogControllingDriver, // 4 0x05100010 - kPMLogControllingDriverErr1, // 5 0x05100014 - bad power state array version - kPMLogControllingDriverErr2, // 6 0x05100018 - power states already registered - kPMLogControllingDriverErr3, // 7 0x0510001c - kPMLogControllingDriverErr4, // 8 0x05100020 - power driver is invalid - kPMLogInterestedDriver, // 9 0x05100024 - kPMLogAcknowledgeErr1, // 10 0x05100028 - unknown entity called acknowledgePowerChange - kPMLogChildAcknowledge, // 11 0x0510002c - kPMLogDriverAcknowledge, // 12 0x05100030 - interested driver acknowledges - kPMLogAcknowledgeErr2, // 13 0x05100034 - object has already acked - kPMLogAcknowledgeErr3, // 14 0x05100038 - not expecting any acks - kPMLogAcknowledgeErr4, // 15 0x0510003c - not expecting acknowledgeSetPowerState - kPMLogDriverAcknowledgeSet, // 16 0x05100040 - controlling driver acknowledges - kPMLogWillChange, // 17 0x05100044 - kPMLogDidChange, // 18 0x05100048 - kPMLogRequestDomain, // 19 0x0510004c - kPMLogMakeUsable, // 20 0x05100050 - kPMLogChangeStateTo, // 21 0x05100054 - kPMLogChangeStateToPriv, // 22 0x05100058 - kPMLogSetAggressiveness, // 23 0x0510005c - kPMLogCriticalTemp, // 24 0x05100060 - kPMLogOverrideOn, // 25 0x05100064 - kPMLogOverrideOff, // 26 0x05100068 - kPMLogEnqueueErr, // 27 0x0510006c - NOT USED - kPMLogCollapseQueue, // 28 0x05100070 - NOT USED - kPMLogChangeDone, // 29 0x05100074 - kPMLogCtrlDriverTardy, // 30 0x05100078 - controlling driver didn't acknowledge - kPMLogIntDriverTardy, // 31 0x0510007c - interested driver didn't acknowledge - kPMLogStartAckTimer, // 32 0x05100080 - kPMLogStartParentChange, // 33 0x05100084 - kPMLogAmendParentChange, // 34 0x05100088 - kPMLogStartDeviceChange, // 35 0x0510008c - kPMLogRequestDenied, // 36 0x05100090 - parent denied domain state change request - kPMLogControllingDriverErr5, // 37 0x05100094 - too few power states - kPMLogProgramHardware, // 38 0x05100098 - kPMLogInformDriverPreChange, // 39 0x0510009c - kPMLogInformDriverPostChange, // 40 0x051000a0 - kPMLogRemoveDriver, // 41 0x051000a4 - NOT USED - kPMLogSetIdleTimerPeriod, // 42 0x051000a8 - kPMLogSystemWake, // 43 0x051000ac - kPMLogAcknowledgeErr5, // 44 0x051000b0 - kPMLogClientAcknowledge, // 45 0x051000b4 - kPMLogClientTardy, // 46 0x051000b8 - application didn't acknowledge - kPMLogClientCancel, // 47 0x051000bc - NOT USED - kPMLogClientNotify, // 48 0x051000c0 - client sent a notification - kPMLogAppNotify, // 49 0x051000c4 - application sent a notification - kPMLogSetClockGating, // 50 0x051000c8 - NOT USED - kPMLogSetPowerGating, // 51 0x051000cc - NOT USED - kPMLogSetPinGroup, // 52 0x051000d0 - NOT USED - kPMLogIdleCancel, // 53 0x051000d4 - device unidle during change + kPMLogSetParent = 1, // 1 0x05070004 + kPMLogAddChild, // 2 0x05070008 + kPMLogRemoveChild, // 3 0x0507000c + kPMLogControllingDriver, // 4 0x05070010 + kPMLogControllingDriverErr1, // 5 0x05070014 - bad power state array version + kPMLogControllingDriverErr2, // 6 0x05070018 - power states already registered + kPMLogControllingDriverErr3, // 7 0x0507001c + kPMLogControllingDriverErr4, // 8 0x05070020 - power driver is invalid + kPMLogInterestedDriver, // 9 0x05070024 + kPMLogAcknowledgeErr1, // 10 0x05070028 - unknown entity called acknowledgePowerChange + kPMLogChildAcknowledge, // 11 0x0507002c + kPMLogDriverAcknowledge, // 12 0x05070030 - interested driver acknowledges + kPMLogAcknowledgeErr2, // 13 0x05070034 - object has already acked + kPMLogAcknowledgeErr3, // 14 0x05070038 - not expecting any acks + kPMLogAcknowledgeErr4, // 15 0x0507003c - not expecting acknowledgeSetPowerState + kPMLogDriverAcknowledgeSet, // 16 0x05070040 - controlling driver acknowledges + kPMLogWillChange, // 17 0x05070044 + kPMLogDidChange, // 18 0x05070048 + kPMLogRequestDomain, // 19 0x0507004c + kPMLogMakeUsable, // 20 0x05070050 + kPMLogChangeStateTo, // 21 0x05070054 + kPMLogChangeStateToPriv, // 22 0x05070058 + kPMLogSetAggressiveness, // 23 0x0507005c + kPMLogCriticalTemp, // 24 0x05070060 + kPMLogOverrideOn, // 25 0x05070064 + kPMLogOverrideOff, // 26 0x05070068 + kPMLogEnqueueErr, // 27 0x0507006c - NOT USED + kPMLogCollapseQueue, // 28 0x05070070 - NOT USED + kPMLogChangeDone, // 29 0x05070074 + kPMLogCtrlDriverTardy, // 30 0x05070078 - controlling driver didn't acknowledge + kPMLogIntDriverTardy, // 31 0x0507007c - interested driver didn't acknowledge + kPMLogStartAckTimer, // 32 0x05070080 + kPMLogStartParentChange, // 33 0x05070084 + kPMLogAmendParentChange, // 34 0x05070088 + kPMLogStartDeviceChange, // 35 0x0507008c + kPMLogRequestDenied, // 36 0x05070090 - parent denied domain state change request + kPMLogControllingDriverErr5, // 37 0x05070094 - too few power states + kPMLogProgramHardware, // 38 0x05070098 + kPMLogInformDriverPreChange, // 39 0x0507009c + kPMLogInformDriverPostChange, // 40 0x050700a0 + kPMLogRemoveDriver, // 41 0x050700a4 - NOT USED + kPMLogSetIdleTimerPeriod, // 42 0x050700a8 + kPMLogSystemWake, // 43 0x050700ac + kPMLogAcknowledgeErr5, // 44 0x050700b0 + kPMLogClientAcknowledge, // 45 0x050700b4 + kPMLogClientTardy, // 46 0x050700b8 - application didn't acknowledge + kPMLogClientCancel, // 47 0x050700bc - NOT USED + kPMLogClientNotify, // 48 0x050700c0 - client sent a notification + kPMLogAppNotify, // 49 0x050700c4 - application sent a notification + kPMLogSetClockGating, // 50 0x050700c8 - NOT USED + kPMLogSetPowerGating, // 51 0x050700cc - NOT USED + kPMLogSetPinGroup, // 52 0x050700d0 - NOT USED + kPMLogIdleCancel, // 53 0x050700d4 - device unidle during change + kPMLogSleepWakeTracePoint, // 54 0x050700d8 - kIOPMTracePoint markers kIOPMlogLastEvent }; diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index dd6d10f2a..b3d6e1f44 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -2229,7 +2229,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) #endif // log system wake - getPlatform()->PMLog(kIOPMrootDomainClass, kPMLogSystemWake, 0, 0); + PMDebug(kPMLogSystemWake, 0, 0); lowBatteryCondition = false; lastSleepReason = 0; @@ -6654,6 +6654,7 @@ void IOPMrootDomain::tracePoint( uint8_t point ) { if (systemBooting) return; + PMDebug(kPMLogSleepWakeTracePoint, point, 0); pmTracer->tracePoint(point); #if HIBERNATION @@ -6663,8 +6664,10 @@ void IOPMrootDomain::tracePoint( uint8_t point ) void IOPMrootDomain::tracePoint( uint8_t point, uint8_t data ) { - if (!systemBooting) - pmTracer->tracePoint(point, data); + if (systemBooting) return; + + PMDebug(kPMLogSleepWakeTracePoint, point, data); + pmTracer->tracePoint(point, data); } void IOPMrootDomain::traceDetail( uint32_t detail ) diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index e7f393474..29c286ea6 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -373,48 +373,14 @@ void IOPlatformExpert:: PMLog(const char *who, unsigned long event, unsigned long param1, unsigned long param2) { - UInt32 debugFlags = gIOKitDebug; - UInt32 traceFlags = gIOKitTrace; - uintptr_t name = 0; - UInt32 i = 0; - - if (debugFlags & kIOLogPower) { - clock_sec_t nows; clock_usec_t nowus; clock_get_system_microtime(&nows, &nowus); nowus += (nows % 1000) * 1000000; - kprintf("pm%u %p %.30s %d %lx %lx\n", + kprintf("pm%u %p %.30s %d %lx %lx\n", nowus, current_thread(), who, // Identity (int) event, (long) param1, (long) param2); // Args - - if (traceFlags & kIOTracePowerMgmt) { - static const UInt32 sStartStopBitField[] = - { 0x00000000, 0x00000040 }; // Only Program Hardware so far - - // Arcane formula from Hacker's Delight by Warren - // abs(x) = ((int) x >> 31) ^ (x + ((int) x >> 31)) - UInt32 sgnevent = ((long) event >> 31); - UInt32 absevent = sgnevent ^ (event + sgnevent); - UInt32 code = IODBG_POWER(absevent); - - UInt32 bit = 1 << (absevent & 0x1f); - if (absevent < sizeof(sStartStopBitField) * 8 - && (sStartStopBitField[absevent >> 5] & bit) ) { - // Or in the START or END bits, Start = 1 & END = 2 - // If sgnevent == 0 then START - 0 => START - // else if sgnevent == -1 then START - -1 => END - code |= DBG_FUNC_START - sgnevent; - } - - // Get first 8 characters of the name - while ( i < sizeof(uintptr_t) && who[i] != 0) - { ((char *)&name)[sizeof(uintptr_t)-i-1]=who[i]; i++; } - // Record the timestamp. - IOTimeStampConstant(code, name, event, param1, param2); - } - } } diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index e46b02482..97711e1c8 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -153,8 +154,12 @@ do { \ #define kIOPMTardyAckPSCKey "IOPMTardyAckPowerStateChange" #define kPwrMgtKey "IOPowerManagement" -#define OUR_PMLog(t, a, b) \ - do { gPlatform->PMLog( fName, t, a, b); } while(0) +#define OUR_PMLog(t, a, b) do { \ + if (gIOKitDebug & kIOLogPower) \ + pwrMgt->pmPrint(t, a, b); \ + if (gIOKitTrace & kIOTracePowerMgmt) \ + pwrMgt->pmTrace(t, a, b); \ + } while(0) #define NS_TO_MS(nsec) ((int)((nsec) / 1000000ULL)) #define NS_TO_US(nsec) ((int)((nsec) / 1000ULL)) @@ -675,6 +680,11 @@ void IOService::PMfree ( void ) } } +void IOService::PMDebug( uint32_t event, uintptr_t param1, uintptr_t param2 ) +{ + OUR_PMLog(event, param1, param2); +} + //********************************************************************************* // [public] joinPMtree // @@ -4130,7 +4140,7 @@ void IOService::all_done ( void ) #if PM_VARS_SUPPORT fPMVars->myCurrentState = fCurrentPowerState; #endif - OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, 0); + OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, prevPowerState); PM_ACTION_2(actionPowerChangeDone, fHeadNotePowerState, fHeadNoteChangeFlags); callAction = true; @@ -4180,7 +4190,7 @@ void IOService::all_done ( void ) #endif fMaxPowerState = fControllingDriver->maxCapabilityForDomainState(fHeadNoteDomainFlags); - OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, 0); + OUR_PMLog(kPMLogChangeDone, fCurrentPowerState, prevPowerState); PM_ACTION_2(actionPowerChangeDone, fHeadNotePowerState, fHeadNoteChangeFlags); callAction = true; @@ -7987,6 +7997,51 @@ bool IOServicePM::serialize( OSSerialize * s ) const return (kIOReturnSuccess == ret); } +void IOServicePM::pmPrint( + uint32_t event, + uintptr_t param1, + uintptr_t param2 ) const +{ + gPlatform->PMLog(Name, event, param1, param2); +} + +void IOServicePM::pmTrace( + uint32_t event, + uintptr_t param1, + uintptr_t param2 ) const +{ + const char * who = Name; + uint64_t regId = Owner->getRegistryEntryID(); + uintptr_t name = 0; + + static const uint32_t sStartStopBitField[] = + { 0x00000000, 0x00000040 }; // Only Program Hardware so far + + // Arcane formula from Hacker's Delight by Warren + // abs(x) = ((int) x >> 31) ^ (x + ((int) x >> 31)) + uint32_t sgnevent = ((int) event >> 31); + uint32_t absevent = sgnevent ^ (event + sgnevent); + uint32_t code = IODBG_POWER(absevent); + + uint32_t bit = 1 << (absevent & 0x1f); + if ((absevent < (sizeof(sStartStopBitField) * 8)) && + (sStartStopBitField[absevent >> 5] & bit)) + { + // Or in the START or END bits, Start = 1 & END = 2 + // If sgnevent == 0 then START - 0 => START + // else if sgnevent == -1 then START - -1 => END + code |= DBG_FUNC_START - sgnevent; + } + + // Copy the first characters of the name into an uintptr_t + for (uint32_t i = 0; (i < sizeof(uintptr_t) && who[i] != 0); i++) + { + ((char *) &name)[sizeof(uintptr_t) - i - 1] = who[i]; + } + + IOTimeStampConstant(code, name, (uintptr_t) regId, param1, param2); +} + PMEventDetails* PMEventDetails::eventDetails(uint32_t type, const char *ownerName, uintptr_t ownerUnique, diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h index bdb74358b..47f99ea45 100644 --- a/iokit/Kernel/IOServicePMPrivate.h +++ b/iokit/Kernel/IOServicePMPrivate.h @@ -347,6 +347,10 @@ class IOServicePM : public OSObject // Serialize IOServicePM state for debug output. IOReturn gatedSerialize( OSSerialize * s ); virtual bool serialize( OSSerialize * s ) const; + + // PM log and trace + void pmPrint( uint32_t event, uintptr_t param1, uintptr_t param2 ) const; + void pmTrace( uint32_t event, uintptr_t param1, uintptr_t param2 ) const; }; #define fOwner pwrMgt->Owner diff --git a/iokit/Kernel/IOTimerEventSource.cpp b/iokit/Kernel/IOTimerEventSource.cpp index c71feccf0..e42fa436c 100644 --- a/iokit/Kernel/IOTimerEventSource.cpp +++ b/iokit/Kernel/IOTimerEventSource.cpp @@ -42,6 +42,10 @@ __END_DECLS #include #include +#if CONFIG_DTRACE +#include +#endif + #define super IOEventSource OSDefineMetaClassAndStructors(IOTimerEventSource, IOEventSource) OSMetaClassDefineReservedUnused(IOTimerEventSource, 0); @@ -117,6 +121,9 @@ void IOTimerEventSource::timeout(void *self) (uintptr_t) doit, (uintptr_t) me->owner); (*doit)(me->owner, me); +#if CONFIG_DTRACE + DTRACE_TMR3(iotescallout__expire, Action, doit, OSObject, me->owner, void, me->workLoop); +#endif if (trace) IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION), @@ -156,6 +163,9 @@ void IOTimerEventSource::timeoutAndRelease(void * self, void * c) (uintptr_t) doit, (uintptr_t) me->owner); (*doit)(me->owner, me); +#if CONFIG_DTRACE + DTRACE_TMR3(iotescallout__expire, Action, doit, OSObject, me->owner, void, me->workLoop); +#endif if (trace) IOTimeStampEndConstant(IODBG_TIMES(IOTIMES_ACTION), diff --git a/iokit/Kernel/IOWorkLoop.cpp b/iokit/Kernel/IOWorkLoop.cpp index d2c28b043..047a01fe8 100644 --- a/iokit/Kernel/IOWorkLoop.cpp +++ b/iokit/Kernel/IOWorkLoop.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #define super OSObject @@ -177,6 +178,7 @@ bool IOWorkLoop::init() return false; } + (void) thread_set_tag(workThread, THREAD_TAG_IOWORKLOOP); return true; } diff --git a/osfmk/device/iokit_rpc.c b/osfmk/device/iokit_rpc.c index 1b6c4d4ec..978cb5ead 100644 --- a/osfmk/device/iokit_rpc.c +++ b/osfmk/device/iokit_rpc.c @@ -436,6 +436,8 @@ iokit_no_senders( mach_no_senders_notification_t * notification ) /* port unlocked */ if ( notify != IP_NULL) ipc_port_release_sonce(notify); + } else { + ip_unlock(port); } } iokit_remove_reference( obj ); diff --git a/osfmk/i386/Diagnostics.c b/osfmk/i386/Diagnostics.c index d290456ce..0921ad575 100644 --- a/osfmk/i386/Diagnostics.c +++ b/osfmk/i386/Diagnostics.c @@ -75,6 +75,26 @@ diagWork dgWork; uint64_t lastRuptClear = 0ULL; +void cpu_powerstats(void *); + +typedef struct { + uint64_t caperf; + uint64_t cmperf; + uint64_t ccres[3]; + uint64_t crtimes[4]; + uint64_t citimes[4]; + uint64_t crtime_total; + uint64_t citime_total; +} core_energy_stat_t; + +typedef struct { + uint64_t pkg_cres[2][4]; + uint64_t pkg_power_unit; + uint64_t pkg_energy; + uint32_t ncpus; + core_energy_stat_t cest[]; +} pkg_energy_statistics_t; + int diagCall64(x86_saved_state_t * state) @@ -84,6 +104,7 @@ diagCall64(x86_saved_state_t * state) uint64_t currNap, durNap; x86_saved_state64_t *regs; boolean_t diagflag; + uint32_t rval = 0; assert(is_saved_state64(state)); regs = saved_state64(state); @@ -93,8 +114,6 @@ diagCall64(x86_saved_state_t * state) switch (selector) { /* Select the routine */ case dgRuptStat: /* Suck Interruption statistics */ (void) ml_set_interrupts_enabled(TRUE); - if (diagflag == 0) - break; data = regs->rsi; /* Get the number of processors */ if (data == 0) { /* If no location is specified for data, clear all @@ -107,7 +126,8 @@ diagCall64(x86_saved_state_t * state) } lastRuptClear = mach_absolute_time(); /* Get the time of clear */ - return 1; /* Normal return */ + rval = 1; /* Normal return */ + break; } (void) copyout((char *) &real_ncpus, data, sizeof(real_ncpus)); /* Copy out number of @@ -132,8 +152,63 @@ diagCall64(x86_saved_state_t * state) curpos = curpos + (256 * sizeof(uint32_t) + 8); /* Point to next out put * slot */ } - return 1; + rval = 1; break; + case dgPowerStat: + { + uint32_t c2l = 0, c2h = 0, c3l = 0, c3h = 0, c6l = 0, c6h = 0, c7l = 0, c7h = 0; + uint32_t pkg_unit_l = 0, pkg_unit_h = 0, pkg_ecl = 0, pkg_ech = 0; + + pkg_energy_statistics_t pkes; + core_energy_stat_t cest; + + bzero(&pkes, sizeof(pkes)); + bzero(&cest, sizeof(cest)); + + rdmsr_carefully(MSR_IA32_PKG_C2_RESIDENCY, &c2l, &c2h); + rdmsr_carefully(MSR_IA32_PKG_C3_RESIDENCY, &c3l, &c3h); + rdmsr_carefully(MSR_IA32_PKG_C6_RESIDENCY, &c6l, &c6h); + rdmsr_carefully(MSR_IA32_PKG_C7_RESIDENCY, &c7l, &c7h); + + pkes.pkg_cres[0][0] = ((uint64_t)c2h << 32) | c2l; + pkes.pkg_cres[0][1] = ((uint64_t)c3h << 32) | c3l; + pkes.pkg_cres[0][2] = ((uint64_t)c6h << 32) | c6l; + pkes.pkg_cres[0][3] = ((uint64_t)c7h << 32) | c7l; + + rdmsr_carefully(MSR_IA32_PKG_POWER_SKU_UNIT, &pkg_unit_l, &pkg_unit_h); + rdmsr_carefully(MSR_IA32_PKG_ENERGY_STATUS, &pkg_ecl, &pkg_ech); + + pkes.pkg_power_unit = ((uint64_t)pkg_unit_h << 32) | pkg_unit_l; + pkes.pkg_energy = ((uint64_t)pkg_ech << 32) | pkg_ecl; + + pkes.ncpus = real_ncpus; + + (void) ml_set_interrupts_enabled(TRUE); + + copyout(&pkes, regs->rsi, sizeof(pkes)); + curpos = regs->rsi + sizeof(pkes); + + mp_cpus_call(CPUMASK_ALL, ASYNC, cpu_powerstats, NULL); + + for (i = 0; i < real_ncpus; i++) { + cest.caperf = cpu_data_ptr[i]->cpu_aperf; + cest.cmperf = cpu_data_ptr[i]->cpu_mperf; + cest.ccres[0] = cpu_data_ptr[i]->cpu_c3res; + cest.ccres[1] = cpu_data_ptr[i]->cpu_c6res; + cest.ccres[2] = cpu_data_ptr[i]->cpu_c7res; + + bcopy(&cpu_data_ptr[i]->cpu_rtimes[0], &cest.crtimes[0], sizeof(cest.crtimes)); + bcopy(&cpu_data_ptr[i]->cpu_itimes[0], &cest.citimes[0], sizeof(cest.citimes)); + cest.citime_total = cpu_data_ptr[i]->cpu_itime_total; + cest.crtime_total = cpu_data_ptr[i]->cpu_rtime_total; + + copyout(&cest, curpos, sizeof(cest)); + curpos += sizeof(cest); + } + rval = 1; + } + break; + #if DEBUG case dgGzallocTest: { @@ -155,14 +230,40 @@ diagCall64(x86_saved_state_t * state) if (diagflag == 0) break; - return pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL); + rval = pmap_permissions_verify(kernel_pmap, kernel_map, 0, ~0ULL); } break; #endif /* __x86_64__*/ default: /* Handle invalid ones */ - return 0; /* Return an exception */ + rval = 0; /* Return an exception */ } - return 1; /* Normal non-ast check return */ + regs->rax = rval; + + return rval; /* Normal non-ast check return */ +} + +void cpu_powerstats(__unused void *arg) { + cpu_data_t *cdp = current_cpu_datap(); + int cnum = cdp->cpu_number; + uint32_t cl = 0, ch = 0, mpl = 0, mph = 0, apl = 0, aph = 0; + + rdmsr_carefully(MSR_IA32_MPERF, &mpl, &mph); + rdmsr_carefully(MSR_IA32_APERF, &apl, &aph); + + cdp->cpu_mperf = ((uint64_t)mph << 32) | mpl; + cdp->cpu_aperf = ((uint64_t)aph << 32) | apl; + + if (cnum & 1) + return; + + rdmsr_carefully(MSR_IA32_CORE_C3_RESIDENCY, &cl, &ch); + cdp->cpu_c3res = ((uint64_t)ch << 32) | cl; + + rdmsr_carefully(MSR_IA32_CORE_C6_RESIDENCY, &cl, &ch); + cdp->cpu_c6res = ((uint64_t)ch << 32) | cl; + + rdmsr_carefully(MSR_IA32_CORE_C7_RESIDENCY, &cl, &ch); + cdp->cpu_c7res = ((uint64_t)ch << 32) | cl; } diff --git a/osfmk/i386/Diagnostics.h b/osfmk/i386/Diagnostics.h index e2c264a8b..2ce145e27 100644 --- a/osfmk/i386/Diagnostics.h +++ b/osfmk/i386/Diagnostics.h @@ -59,14 +59,14 @@ int diagCall64(x86_saved_state_t *regs); #define dgBootScreen 7 #define dgFlush 8 #define dgAlign 9 -#define dgprw 10 +#define dgGzallocTest 10 #define dgmck 11 #define dg64 12 #define dgProbeRead 13 #define dgCPNull 14 #define dgPerfMon 15 #define dgMapPage 16 -#define dgGzallocTest 17 +#define dgPowerStat 17 #define dgBind 18 #define dgAcntg 20 #define dgKlra 21 diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index 3473fbd3a..a501be695 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -239,6 +239,16 @@ typedef struct cpu_data uint64_t cpu_pmap_pcid_preserves; #endif #endif /* x86_64 */ + uint64_t cpu_aperf; + uint64_t cpu_mperf; + uint64_t cpu_c3res; + uint64_t cpu_c6res; + uint64_t cpu_c7res; + uint64_t cpu_itime_total; + uint64_t cpu_rtime_total; + uint64_t cpu_rtimes[4]; + uint64_t cpu_itimes[4]; + uint64_t cpu_ixtime; uint64_t cpu_max_observed_int_latency; int cpu_max_observed_int_latency_vector; uint64_t debugger_entry_time; diff --git a/osfmk/i386/cpu_topology.h b/osfmk/i386/cpu_topology.h index 77445d9b1..ff109f927 100644 --- a/osfmk/i386/cpu_topology.h +++ b/osfmk/i386/cpu_topology.h @@ -207,6 +207,7 @@ typedef struct x86_pkg void *pmStats; /* Power Management stats for package*/ void *pmState; /* Power Management state for package*/ struct mca_state *mca_state; /* MCA state for memory errors */ + uint32_t num_idle; } x86_pkg_t; extern x86_pkg_t *x86_pkgs; /* root of all CPU packages */ diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 84f860b5b..1119a0a73 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -562,7 +562,7 @@ fpu_set_fxstate( iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32; /* Sanitize XSAVE header */ bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd)); - if (state_size == sizeof(struct x86_avx_thread_state)) + if (fpu_nyreg) iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87); else iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87); diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c index 22eae0159..e008a9a61 100644 --- a/osfmk/i386/machine_routines.c +++ b/osfmk/i386/machine_routines.c @@ -248,6 +248,15 @@ boolean_t ml_at_interrupt_context(void) return get_interrupt_level() != 0; } +void ml_get_power_state(boolean_t *icp, boolean_t *pidlep) { + *icp = (get_interrupt_level() != 0); + /* These will be technically inaccurate for interrupts that occur + * successively within a single "idle exit" event, but shouldn't + * matter statistically. + */ + *pidlep = (current_cpu_datap()->lcpu.package->num_idle == topoParms.nLThreadsPerPackage); +} + /* Generate a fake interrupt */ void ml_cause_interrupt(void) { diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h index 65e28b742..d800625f7 100644 --- a/osfmk/i386/machine_routines.h +++ b/osfmk/i386/machine_routines.h @@ -312,6 +312,7 @@ boolean_t ml_fpu_avx_enabled(void); void interrupt_latency_tracker_setup(void); void interrupt_reset_latency_stats(void); void interrupt_populate_latency_stats(char *, unsigned); +void ml_get_power_state(boolean_t *, boolean_t *); #endif /* XNU_KERNEL_PRIVATE */ #endif /* _I386_MACHINE_ROUTINES_H_ */ diff --git a/osfmk/i386/pal_routines.h b/osfmk/i386/pal_routines.h index dc59735b9..7e0112fbb 100644 --- a/osfmk/i386/pal_routines.h +++ b/osfmk/i386/pal_routines.h @@ -1,3 +1,4 @@ + /* * Copyright (c) 2009 Apple Inc. All rights reserved. * @@ -154,14 +155,18 @@ void pal_efi_hibernate_prepare(void); /* Include a PAL-specific header, too, for xnu-internal overrides */ #include +extern boolean_t virtualized; +#define PAL_VIRTUALIZED_PROPERTY_VALUE 4 /* Allow for tricky IOKit property matching */ #define PAL_AICPM_PROPERTY_NAME "intel_cpupm_matching" static inline void pal_get_resource_property(const char **property_name, int *property_value) { - *property_name = PAL_AICPM_PROPERTY_NAME; - *property_value = PAL_AICPM_PROPERTY_VALUE; + *property_name = PAL_AICPM_PROPERTY_NAME; + *property_value = PAL_AICPM_PROPERTY_VALUE; + if (virtualized) + *property_value = PAL_VIRTUALIZED_PROPERTY_VALUE; } /* assembly function to update TSC / timebase info */ diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index b22749df7..ec5ae7f78 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -76,6 +76,14 @@ power_management_init(void) (*pmDispatch->cstateInit)(); } +#define CPU_ACTIVE_STAT_BIN_1 (500000) +#define CPU_ACTIVE_STAT_BIN_2 (2000000) +#define CPU_ACTIVE_STAT_BIN_3 (5000000) + +#define CPU_IDLE_STAT_BIN_1 (500000) +#define CPU_IDLE_STAT_BIN_2 (2000000) +#define CPU_IDLE_STAT_BIN_3 (5000000) + /* * Called when the CPU is idle. It calls into the power management kext * to determine the best way to idle the CPU. @@ -84,14 +92,31 @@ void machine_idle(void) { cpu_data_t *my_cpu = current_cpu_datap(); + uint64_t ctime, rtime, itime; if (my_cpu == NULL) goto out; + ctime = mach_absolute_time(); + my_cpu->lcpu.state = LCPU_IDLE; DBGLOG(cpu_handle, cpu_number(), MP_IDLE); MARK_CPU_IDLE(cpu_number()); + rtime = ctime - my_cpu->cpu_ixtime; + + my_cpu->cpu_rtime_total += rtime; + + if (rtime < CPU_ACTIVE_STAT_BIN_1) + my_cpu->cpu_rtimes[0]++; + else if (rtime < CPU_ACTIVE_STAT_BIN_2) + my_cpu->cpu_rtimes[1]++; + else if (rtime < CPU_ACTIVE_STAT_BIN_3) + my_cpu->cpu_rtimes[2]++; + else + my_cpu->cpu_rtimes[3]++; + + if (pmInitDone) { /* * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay() @@ -129,8 +154,24 @@ machine_idle(void) */ MARK_CPU_ACTIVE(cpu_number()); DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE); + + uint64_t ixtime = my_cpu->cpu_ixtime = mach_absolute_time(); + itime = ixtime - ctime; + my_cpu->lcpu.state = LCPU_RUN; + if (itime < CPU_IDLE_STAT_BIN_1) + my_cpu->cpu_itimes[0]++; + else if (itime < CPU_IDLE_STAT_BIN_2) + my_cpu->cpu_itimes[1]++; + else if (itime < CPU_IDLE_STAT_BIN_3) + my_cpu->cpu_itimes[2]++; + else + my_cpu->cpu_itimes[3]++; + + my_cpu->cpu_itime_total += itime; + + /* * Re-enable interrupts. */ @@ -362,7 +403,7 @@ pmCPUExitHalt(int cpu) kern_return_t pmCPUExitHaltToOff(int cpu) { - kern_return_t rc = KERN_INVALID_ARGUMENT; + kern_return_t rc = KERN_SUCCESS; if (pmInitDone && pmDispatch != NULL @@ -890,3 +931,14 @@ pmsBuild(__unused pmsDef *pd, __unused uint32_t pdsize, { return(KERN_SUCCESS); } + +void machine_track_platform_idle(boolean_t entry) { + cpu_data_t *my_cpu = current_cpu_datap(); + + if (entry) { + (void)__sync_fetch_and_add(&my_cpu->lcpu.package->num_idle, 1); + } + else { + (void)__sync_fetch_and_sub(&my_cpu->lcpu.package->num_idle, 1); + } +} diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index 755be1c69..6438d9372 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -535,6 +535,9 @@ __END_DECLS #define MSR_PLATFORM_INFO 0xce +#define MSR_IA32_MPERF 0xE7 +#define MSR_IA32_APERF 0xE8 + #define MSR_PMG_CST_CONFIG_CONTROL 0xe2 #define MSR_IA32_BBL_CR_CTL 0x119 @@ -585,6 +588,14 @@ __END_DECLS #define MSR_IA32_MTRR_FIX4K_F0000 0x26e #define MSR_IA32_MTRR_FIX4K_F8000 0x26f +#define MSR_IA32_PKG_C3_RESIDENCY 0x3F8 +#define MSR_IA32_PKG_C6_RESIDENCY 0x3F9 +#define MSR_IA32_PKG_C7_RESIDENCY 0x3FA + +#define MSR_IA32_CORE_C3_RESIDENCY 0x3FC +#define MSR_IA32_CORE_C6_RESIDENCY 0x3FD +#define MSR_IA32_CORE_C7_RESIDENCY 0x3FE + #define MSR_IA32_MC0_CTL 0x400 #define MSR_IA32_MC0_STATUS 0x401 #define MSR_IA32_MC0_ADDR 0x402 @@ -604,8 +615,9 @@ __END_DECLS #define MSR_IA32_DS_AREA 0x600 -#define MSR_IA32_PACKAGE_POWER_SKU_UNIT 0x606 -#define MSR_IA32_PACKAGE_ENERY_STATUS 0x611 +#define MSR_IA32_PKG_POWER_SKU_UNIT 0x606 +#define MSR_IA32_PKG_C2_RESIDENCY 0x60D +#define MSR_IA32_PKG_ENERGY_STATUS 0x611 #define MSR_IA32_PRIMARY_PLANE_ENERY_STATUS 0x639 #define MSR_IA32_SECONDARY_PLANE_ENERY_STATUS 0x641 #define MSR_IA32_TSC_DEADLINE 0x6e0 diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c index dc55ce790..75b1e5f83 100644 --- a/osfmk/kern/host.c +++ b/osfmk/kern/host.c @@ -438,6 +438,26 @@ MACRO_END return (KERN_SUCCESS); } + case HOST_EXPIRED_TASK_INFO: + { + if (*count < TASK_POWER_INFO_COUNT) { + return (KERN_FAILURE); + } + + task_power_info_t tinfo = (task_power_info_t)info; + + tinfo->task_interrupt_wakeups = dead_task_statistics.task_interrupt_wakeups; + tinfo->task_platform_idle_wakeups = dead_task_statistics.task_platform_idle_wakeups; + + tinfo->task_timer_wakeups_bin_1 = dead_task_statistics.task_timer_wakeups_bin_1; + tinfo->task_timer_wakeups_bin_2 = dead_task_statistics.task_timer_wakeups_bin_2; + + tinfo->total_user = dead_task_statistics.total_user_time; + tinfo->total_system = dead_task_statistics.total_system_time; + + return (KERN_SUCCESS); + } + default: return (KERN_INVALID_ARGUMENT); } diff --git a/osfmk/kern/host.h b/osfmk/kern/host.h index 24b052648..59458f00c 100644 --- a/osfmk/kern/host.h +++ b/osfmk/kern/host.h @@ -91,6 +91,17 @@ extern host_data_t realhost; extern vm_extmod_statistics_data_t host_extmod_statistics; +typedef struct { + uint64_t total_user_time; + uint64_t total_system_time; + uint64_t task_interrupt_wakeups; + uint64_t task_platform_idle_wakeups; + uint64_t task_timer_wakeups_bin_1; + uint64_t task_timer_wakeups_bin_2; +} expired_task_statistics_t; + +extern expired_task_statistics_t dead_task_statistics; + #endif /* MACH_KERNEL_PRIVATE */ /* diff --git a/osfmk/kern/machine.h b/osfmk/kern/machine.h index 106a9b41d..6d46f2c6e 100644 --- a/osfmk/kern/machine.h +++ b/osfmk/kern/machine.h @@ -90,6 +90,8 @@ extern thread_t machine_processor_shutdown( extern void machine_idle(void); +extern void machine_track_platform_idle(boolean_t); + extern void machine_signal_idle( processor_t processor); diff --git a/osfmk/kern/processor_data.h b/osfmk/kern/processor_data.h index 7b01b11a4..5a83c61c8 100644 --- a/osfmk/kern/processor_data.h +++ b/osfmk/kern/processor_data.h @@ -87,6 +87,7 @@ struct processor_data { void *free_pages; struct processor_sched_statistics sched_stats; + uint64_t timer_call_ttd; /* current timer call time-to-deadline */ }; typedef struct processor_data processor_data_t; diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index 5f4803119..eebc19bf5 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -165,6 +165,14 @@ uint32_t sched_fixed_shift; static boolean_t sched_traditional_use_pset_runqueue = FALSE; +/* Defaults for timer deadline profiling */ +#define TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT 2000000 /* Timers with deadlines <= + * 2ms */ +#define TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT 5000000 /* Timers with deadlines + <= 5ms */ +uint64_t timer_deadline_tracking_bin_1; +uint64_t timer_deadline_tracking_bin_2; + __attribute__((always_inline)) static inline run_queue_t runq_for_processor(processor_t processor) { @@ -309,6 +317,9 @@ sched_realtime_init(void) __attribute__((section("__TEXT, initcode"))); static void sched_realtime_timebase_init(void); +static void +sched_timer_deadline_tracking_init(void); + #if defined(CONFIG_SCHED_TRADITIONAL) static void sched_traditional_tick_continue(void); @@ -563,6 +574,7 @@ sched_init(void) SCHED(fairshare_init)(); sched_realtime_init(); ast_init(); + sched_timer_deadline_tracking_init(); SCHED(pset_init)(&pset0); SCHED(processor_init)(master_processor); @@ -858,6 +870,7 @@ thread_unblock( wait_result_t wresult) { boolean_t result = FALSE; + thread_t cthread = current_thread(); /* * Set wait_result. @@ -924,6 +937,43 @@ thread_unblock( thread->computation_metered = 0; thread->reason = AST_NONE; + /* Obtain power-relevant interrupt and "platform-idle exit" statistics. + * We also account for "double hop" thread signaling via + * the thread callout infrastructure. + * DRK: consider removing the callout wakeup counters in the future + * they're present for verification at the moment. + */ + boolean_t aticontext, pidle; + ml_get_power_state(&aticontext, &pidle); + if (__improbable(aticontext)) { + ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1); + uint64_t ttd = PROCESSOR_DATA(current_processor(), timer_call_ttd); + if (ttd) { + if (ttd <= timer_deadline_tracking_bin_1) + thread->thread_timer_wakeups_bin_1++; + else + if (ttd <= timer_deadline_tracking_bin_2) + thread->thread_timer_wakeups_bin_2++; + } + if (pidle) { + ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1); + } + } else if (thread_get_tag_internal(cthread) & THREAD_TAG_CALLOUT) { + if (cthread->callout_woken_from_icontext) { + ledger_credit(thread->t_ledger, task_ledgers.interrupt_wakeups, 1); + thread->thread_callout_interrupt_wakeups++; + if (cthread->callout_woken_from_platform_idle) { + ledger_credit(thread->t_ledger, task_ledgers.platform_idle_wakeups, 1); + thread->thread_callout_platform_idle_wakeups++; + } + } + } + + if (thread_get_tag_internal(thread) & THREAD_TAG_CALLOUT) { + thread->callout_woken_from_icontext = aticontext; + thread->callout_woken_from_platform_idle = pidle; + } + /* Event should only be triggered if thread is not already running */ if (result == FALSE) { KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, @@ -4039,8 +4089,12 @@ processor_idle( IDLE_KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_IDLE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), rt_runq.count, SCHED(processor_runq_count)(processor), -1, 0); + machine_track_platform_idle(TRUE); + machine_idle(); + machine_track_platform_idle(FALSE); + (void)splsched(); IDLE_KERNEL_DEBUG_CONSTANT( @@ -4506,3 +4560,9 @@ thread_runnable( return ((thread->state & (TH_RUN|TH_WAIT)) == TH_RUN); } #endif /* DEBUG */ + +static void +sched_timer_deadline_tracking_init(void) { + nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_1_DEFAULT, &timer_deadline_tracking_bin_1); + nanoseconds_to_absolutetime(TIMER_DEADLINE_TRACKING_BIN_2_DEFAULT, &timer_deadline_tracking_bin_2); +} diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index 1378bf415..324195b3d 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -157,8 +157,12 @@ lck_mtx_t task_watch_mtx; zinfo_usage_store_t tasks_tkm_private; zinfo_usage_store_t tasks_tkm_shared; +/* A container to accumulate statistics for expired tasks */ +expired_task_statistics_t dead_task_statistics; +lck_spin_t dead_task_statistics_lock; + static ledger_template_t task_ledger_template = NULL; -struct _task_ledger_indices task_ledgers = {-1, -1, -1, -1, -1}; +struct _task_ledger_indices task_ledgers = {-1, -1, -1, -1, -1, -1, -1}; void init_task_ledgers(void); @@ -292,7 +296,7 @@ task_init(void) vm_map_deallocate(kernel_task->map); kernel_task->map = kernel_map; - + lck_spin_init(&dead_task_statistics_lock, &task_lck_grp, &task_lck_attr); } /* @@ -371,10 +375,15 @@ init_task_ledgers(void) "bytes"); task_ledgers.wired_mem = ledger_entry_add(t, "wired_mem", "physmem", "bytes"); + task_ledgers.platform_idle_wakeups = ledger_entry_add(t, "platform_idle_wakeups", "power", + "count"); + task_ledgers.interrupt_wakeups = ledger_entry_add(t, "interrupt_wakeups", "power", + "count"); if ((task_ledgers.cpu_time < 0) || (task_ledgers.tkm_private < 0) || (task_ledgers.tkm_shared < 0) || (task_ledgers.phys_mem < 0) || - (task_ledgers.wired_mem < 0)) { + (task_ledgers.wired_mem < 0) || (task_ledgers.platform_idle_wakeups < 0) || + (task_ledgers.interrupt_wakeups < 0)) { panic("couldn't create entries for task ledger template"); } @@ -556,6 +565,7 @@ task_create_internal( } bzero(&new_task->extmod_statistics, sizeof(new_task->extmod_statistics)); + new_task->task_timer_wakeups_bin_1 = new_task->task_timer_wakeups_bin_2 = 0; lck_mtx_lock(&tasks_threads_lock); queue_enter(&tasks, new_task, task_t, tasks); @@ -580,7 +590,7 @@ void task_deallocate( task_t task) { - ledger_amount_t credit, debit; + ledger_amount_t credit, debit, interrupt_wakeups, platform_idle_wakeups; if (task == TASK_NULL) return; @@ -607,6 +617,23 @@ task_deallocate( vm_map_deallocate(task->map); is_release(task->itk_space); + ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups, + &interrupt_wakeups, &debit); + ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups, + &platform_idle_wakeups, &debit); + + /* Accumulate statistics for dead tasks */ + lck_spin_lock(&dead_task_statistics_lock); + dead_task_statistics.total_user_time += task->total_user_time; + dead_task_statistics.total_system_time += task->total_system_time; + + dead_task_statistics.task_interrupt_wakeups += interrupt_wakeups; + dead_task_statistics.task_platform_idle_wakeups += platform_idle_wakeups; + + dead_task_statistics.task_timer_wakeups_bin_1 += task->task_timer_wakeups_bin_1; + dead_task_statistics.task_timer_wakeups_bin_2 += task->task_timer_wakeups_bin_2; + + lck_spin_unlock(&dead_task_statistics_lock); lck_mtx_destroy(&task->lock, &task_lck_grp); #if CONFIG_MACF_MACH @@ -2030,6 +2057,60 @@ task_info( error = task_affinity_info(task, task_info_out, task_info_count); break; } + + case TASK_POWER_INFO: + { + task_power_info_t info; + thread_t thread; + ledger_amount_t tmp; + + if (*task_info_count < TASK_POWER_INFO_COUNT) { + error = KERN_INVALID_ARGUMENT; + break; + } + + info = (task_power_info_t)task_info_out; + + ledger_get_entries(task->ledger, task_ledgers.interrupt_wakeups, + (ledger_amount_t *)&info->task_interrupt_wakeups, &tmp); + ledger_get_entries(task->ledger, task_ledgers.platform_idle_wakeups, + (ledger_amount_t *)&info->task_platform_idle_wakeups, &tmp); + + info->task_timer_wakeups_bin_1 = task->task_timer_wakeups_bin_1; + info->task_timer_wakeups_bin_2 = task->task_timer_wakeups_bin_2; + + info->total_user = task->total_user_time; + info->total_system = task->total_system_time; + + queue_iterate(&task->threads, thread, thread_t, task_threads) { + uint64_t tval; + spl_t x; + + if ((task == kernel_task) && (thread->priority == IDLEPRI) && (thread->sched_pri == IDLEPRI)) + continue; + x = splsched(); + thread_lock(thread); + + info->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1; + info->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2; + + tval = timer_grab(&thread->user_timer); + info->total_user += tval; + + tval = timer_grab(&thread->system_timer); + if (thread->precise_user_kernel_time) { + info->total_system += tval; + } else { + /* system_timer may represent either sys or user */ + info->total_user += tval; + } + + thread_unlock(thread); + splx(x); + } + break; + } + default: error = KERN_INVALID_ARGUMENT; } diff --git a/osfmk/kern/task.h b/osfmk/kern/task.h index 7d2c981ce..9b6e4a1f6 100644 --- a/osfmk/kern/task.h +++ b/osfmk/kern/task.h @@ -362,6 +362,10 @@ struct task { vm_extmod_statistics_data_t extmod_statistics; natural_t proc_terminate; /* the process is marked for proc_terminate */ + + /* Statistics accumulated for terminated threads from this task */ + uint32_t task_timer_wakeups_bin_1; + uint32_t task_timer_wakeups_bin_2; }; #define task_lock(task) lck_mtx_lock(&(task)->lock) @@ -555,6 +559,8 @@ struct _task_ledger_indices { int tkm_shared; int phys_mem; int wired_mem; + int platform_idle_wakeups; + int interrupt_wakeups; }; extern struct _task_ledger_indices task_ledgers; diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 2738d3850..92840195a 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -506,6 +506,8 @@ thread_terminate_daemon(void) task->syscalls_unix += thread->syscalls_unix; task->syscalls_mach += thread->syscalls_mach; + task->task_timer_wakeups_bin_1 += thread->thread_timer_wakeups_bin_1; + task->task_timer_wakeups_bin_2 += thread->thread_timer_wakeups_bin_2; queue_remove(&task->threads, thread, thread_t, task_threads); task->thread_count--; @@ -1701,6 +1703,15 @@ thread_tid( return (thread != THREAD_NULL? thread->thread_id: 0); } +uint16_t +thread_set_tag(thread_t th, uint16_t tag) { + return thread_set_tag_internal(th, tag); +} +uint16_t +thread_get_tag(thread_t th) { + return thread_get_tag_internal(th); +} + uint64_t thread_dispatchqaddr( thread_t thread) diff --git a/osfmk/kern/thread.h b/osfmk/kern/thread.h index b497fa3fa..89ac1937b 100644 --- a/osfmk/kern/thread.h +++ b/osfmk/kern/thread.h @@ -149,6 +149,7 @@ struct thread { #define TH_OPT_SYSTEM_CRITICAL 0x10 /* Thread must always be allowed to run - even under heavy load */ #define TH_OPT_PROC_CPULIMIT 0x20 /* Thread has a task-wide CPU limit applied to it */ #define TH_OPT_PRVT_CPULIMIT 0x40 /* Thread has a thread-private CPU limit applied to it */ +#define TH_OPT_IDLE_THREAD 0x0080 /* Thread is a per-processor idle thread */ /* Data updated during assert_wait/thread_wakeup */ decl_simple_lock_data(,sched_lock) /* scheduling lock (thread_lock()) */ @@ -427,6 +428,15 @@ struct thread { task_watch_t * taskwatch; /* task watch */ integer_t saved_importance; /* saved task-relative importance */ #endif /* CONFIG_EMBEDDED */ + uint32_t thread_callout_interrupt_wakeups; + uint32_t thread_callout_platform_idle_wakeups; + uint32_t thread_timer_wakeups_bin_1; + uint32_t thread_timer_wakeups_bin_2; + uint16_t thread_tag; + uint16_t callout_woken_from_icontext:1, + callout_woken_from_platform_idle:1, + thread_bitfield_unused:14; + }; #define ith_state saved.receive.state @@ -645,6 +655,13 @@ extern void funnel_lock( extern void funnel_unlock( struct funnel_lock *lock); +static inline uint16_t thread_set_tag_internal(thread_t thread, uint16_t tag) { + return __sync_fetch_and_or(&thread->thread_tag, tag); +} +static inline uint16_t thread_get_tag_internal(thread_t thread) { + return thread->thread_tag; +} + #else /* MACH_KERNEL_PRIVATE */ __BEGIN_DECLS @@ -687,6 +704,16 @@ __BEGIN_DECLS #ifdef XNU_KERNEL_PRIVATE +/* + * Thread tags; for easy identification. + */ +#define THREAD_TAG_MAINTHREAD 0x1 +#define THREAD_TAG_CALLOUT 0x2 +#define THREAD_TAG_IOWORKLOOP 0x4 + +uint16_t thread_set_tag(thread_t, uint16_t); +uint16_t thread_get_tag(thread_t); + extern kern_return_t thread_state_initialize( thread_t thread); diff --git a/osfmk/kern/thread_call.c b/osfmk/kern/thread_call.c index 7d43919ae..36cb66a8e 100644 --- a/osfmk/kern/thread_call.c +++ b/osfmk/kern/thread_call.c @@ -46,7 +46,9 @@ #include #include - +#if CONFIG_DTRACE +#include +#endif static zone_t thread_call_zone; static struct wait_queue daemon_wqueue; @@ -889,11 +891,13 @@ thread_call_enter1_delayed( boolean_t result = TRUE; thread_call_group_t group; spl_t s; + uint64_t abstime; group = thread_call_get_group(call); s = splsched(); thread_call_lock_spin(); + abstime = mach_absolute_time(); result = _delayed_call_enqueue(call, group, deadline); @@ -902,6 +906,10 @@ thread_call_enter1_delayed( call->tc_call.param1 = param1; + call->ttd = (deadline > abstime) ? (deadline - abstime) : 0; +#if CONFIG_DTRACE + DTRACE_TMR4(thread_callout__create, thread_call_func_t, call->tc_call.func, 0, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); +#endif thread_call_unlock(); splx(s); @@ -933,6 +941,9 @@ thread_call_cancel( thread_call_unlock(); splx(s); +#if CONFIG_DTRACE + DTRACE_TMR4(thread_callout__cancel, thread_call_func_t, call->tc_call.func, 0, (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); +#endif return (result); } @@ -1135,6 +1146,9 @@ thread_call_thread( thread_t self = current_thread(); boolean_t canwait; + if ((thread_get_tag_internal(self) & THREAD_TAG_CALLOUT) == 0) + (void)thread_set_tag_internal(self, THREAD_TAG_CALLOUT); + /* * A wakeup with THREAD_INTERRUPTED indicates that * we should terminate. diff --git a/osfmk/kern/thread_call.h b/osfmk/kern/thread_call.h index e2836e293..e1ddebc36 100644 --- a/osfmk/kern/thread_call.h +++ b/osfmk/kern/thread_call.h @@ -228,6 +228,8 @@ struct thread_call { uint32_t tc_flags; int32_t tc_refs; + + uint64_t ttd; /* Time to deadline at creation */ }; #define THREAD_CALL_ALLOC 0x01 diff --git a/osfmk/kern/timer_call.c b/osfmk/kern/timer_call.c index 0d737dbbb..5a17e057c 100644 --- a/osfmk/kern/timer_call.c +++ b/osfmk/kern/timer_call.c @@ -40,7 +40,7 @@ #include -#if CONFIG_DTRACE && (DEVELOPMENT || DEBUG ) +#if CONFIG_DTRACE #include #endif @@ -358,6 +358,15 @@ timer_call_enter_internal( call->soft_deadline = deadline; } #endif + call->ttd = call->soft_deadline - ctime; + +#if CONFIG_DTRACE + DTRACE_TMR6(callout__create, timer_call_func_t, CE(call)->func, + timer_call_param_t, CE(call)->param0, uint32_t, call->flags, + (deadline - call->soft_deadline), + (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); +#endif + queue = timer_queue_assign(deadline); old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline); @@ -409,6 +418,12 @@ timer_call_cancel( } splx(s); +#if CONFIG_DTRACE + DTRACE_TMR6(callout__cancel, timer_call_func_t, CE(call)->func, + timer_call_param_t, CE(call)->param0, uint32_t, call->flags, 0, + (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF)); +#endif + return (old_queue != NULL); } @@ -499,18 +514,25 @@ timer_queue_expire( DECR_TIMER_CALLOUT | DBG_FUNC_START, VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0); -#if CONFIG_DTRACE && (DEVELOPMENT || DEBUG ) - DTRACE_TMR3(callout__start, timer_call_func_t, func, - timer_call_param_t, param0, - timer_call_param_t, param1); +#if CONFIG_DTRACE + DTRACE_TMR6(callout__start, timer_call_func_t, func, + timer_call_param_t, param0, unsigned, call->flags, + 0, (call->ttd >> 32), + (unsigned) (call->ttd & 0xFFFFFFFF)); #endif + /* Maintain time-to-deadline in per-processor data + * structure for thread wakeup deadline statistics. + */ + uint64_t *ttdp = &(PROCESSOR_DATA(current_processor(), timer_call_ttd)); + *ttdp = call->ttd; (*func)(param0, param1); + *ttdp = 0; -#if CONFIG_DTRACE && (DEVELOPMENT || DEBUG ) - DTRACE_TMR3(callout__end, timer_call_func_t, func, - timer_call_param_t, param0, - timer_call_param_t, param1); +#if CONFIG_DTRACE + DTRACE_TMR3(callout__end, timer_call_func_t, func, + timer_call_param_t, param0, timer_call_param_t, + param1); #endif KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, diff --git a/osfmk/kern/timer_call.h b/osfmk/kern/timer_call.h index f2a074d39..1f9370b1e 100644 --- a/osfmk/kern/timer_call.h +++ b/osfmk/kern/timer_call.h @@ -49,6 +49,7 @@ typedef struct timer_call { uint32_t flags; boolean_t async_dequeue; /* this field is protected by call_entry queue's lock */ + uint64_t ttd; /* Time to deadline at creation */ } *timer_call_t; typedef void *timer_call_param_t; diff --git a/osfmk/mach/host_info.h b/osfmk/mach/host_info.h index d90f3be37..0464d5ed0 100644 --- a/osfmk/mach/host_info.h +++ b/osfmk/mach/host_info.h @@ -185,7 +185,7 @@ typedef struct host_priority_info *host_priority_info_t; /* host_statistics64() */ #define HOST_VM_INFO64 4 /* 64-bit virtual memory stats */ #define HOST_EXTMOD_INFO64 5 /* External modification stats */ - +#define HOST_EXPIRED_TASK_INFO 6 /* Statistics for expired tasks */ struct host_load_info { integer_t avenrun[3]; /* scaled by LOAD_SCALE */ diff --git a/osfmk/mach/machine/sdt.h b/osfmk/mach/machine/sdt.h index 551f2b0fc..cf99fbd09 100644 --- a/osfmk/mach/machine/sdt.h +++ b/osfmk/mach/machine/sdt.h @@ -208,6 +208,15 @@ #define DTRACE_TMR3(name, type1, arg1, type2, arg2, type3, arg3) \ DTRACE_PROBE3(__sdt_, name, arg1, arg2, arg3); +#define DTRACE_TMR4(name, type1, arg1, arg2, arg3, arg4) \ + DTRACE_PROBE4(__sdt_, name, arg1, arg2, arg3, arg4); + +#define DTRACE_TMR5(name, type1, arg1, type2, arg2, type3, arg3, arg4, arg5) \ + DTRACE_PROBE5(__sdt_, name, arg1, arg2, arg3, arg4, arg5); + +#define DTRACE_TMR6(name, type1, arg1, type2, arg2, type3, arg3, arg4, arg5, arg6) \ + DTRACE_PROBE6(__sdt_, name, arg1, arg2, arg3, arg4, arg5, arg6); + #define DTRACE_VM(name) \ DTRACE_PROBE(__vminfo_, name) diff --git a/osfmk/mach/task_info.h b/osfmk/mach/task_info.h index d115f3826..8a24624c0 100644 --- a/osfmk/mach/task_info.h +++ b/osfmk/mach/task_info.h @@ -264,6 +264,20 @@ typedef struct task_extmod_info *task_extmod_info_t; /* Always 64-bit in user and kernel */ #define MACH_TASK_BASIC_INFO 20 /* always 64-bit basic info */ +#define TASK_POWER_INFO 21 +struct task_power_info { + uint64_t total_user; + uint64_t total_system; + uint64_t task_interrupt_wakeups; + uint64_t task_platform_idle_wakeups; + uint64_t task_timer_wakeups_bin_1; + uint64_t task_timer_wakeups_bin_2; +}; +typedef struct task_power_info task_power_info_data_t; +typedef struct task_power_info *task_power_info_t; +#define TASK_POWER_INFO_COUNT ((mach_msg_type_number_t) \ + (sizeof (task_power_info_data_t) / sizeof (natural_t))) + struct mach_task_basic_info { mach_vm_size_t virtual_size; /* virtual memory size (bytes) */ mach_vm_size_t resident_size; /* resident memory size (bytes) */ diff --git a/security/mac_framework.h b/security/mac_framework.h index 7d0f15a10..7c91a2469 100644 --- a/security/mac_framework.h +++ b/security/mac_framework.h @@ -548,6 +548,7 @@ int mac_vnode_notify_create(vfs_context_t ctx, struct mount *mp, struct vnode *dvp, struct vnode *vp, struct componentname *cnp); void mac_vnode_notify_rename(vfs_context_t ctx, struct vnode *vp, struct vnode *dvp, struct componentname *cnp); +void mac_vnode_notify_open(vfs_context_t ctx, struct vnode *vp, int acc_flags); int vnode_label(struct mount *mp, struct vnode *dvp, struct vnode *vp, struct componentname *cnp, int flags, vfs_context_t ctx); void vnode_relabel(struct vnode *vp); diff --git a/security/mac_policy.h b/security/mac_policy.h index 914393813..a5c13843e 100644 --- a/security/mac_policy.h +++ b/security/mac_policy.h @@ -6008,6 +6008,23 @@ typedef int mpo_vnode_notify_create_t( struct componentname *cnp ); +/** + @brief Inform MAC policies that a vnode has been opened + @param cred User credential for the creating process + @param vp vnode opened + @param label Policy label for the vp + @param acc_mode open(2) access mode used + + Inform Mac policies that a vnode have been successfully opened + (passing all MAC polices and DAC). +*/ +typedef void mpo_vnode_notify_open_t( + kauth_cred_t cred, + struct vnode *vp, + struct label *label, + int acc_mode +); + /** @brief Inform MAC policies that a vnode has been renamed @param cred User credential for the renaming process @@ -6362,7 +6379,7 @@ struct mac_policy_ops { mpo_thread_label_destroy_t *mpo_thread_label_destroy; mpo_system_check_kas_info_t *mpo_system_check_kas_info; mpo_reserved_hook_t *mpo_reserved18; - mpo_reserved_hook_t *mpo_reserved19; + mpo_vnode_notify_open_t *mpo_vnode_notify_open; mpo_reserved_hook_t *mpo_reserved20; mpo_reserved_hook_t *mpo_reserved21; mpo_reserved_hook_t *mpo_reserved22; diff --git a/security/mac_vfs.c b/security/mac_vfs.c index ba8e50fce..7bf550622 100644 --- a/security/mac_vfs.c +++ b/security/mac_vfs.c @@ -392,6 +392,19 @@ mac_vnode_notify_rename(vfs_context_t ctx, struct vnode *vp, dvp, dvp->v_label, cnp); } +void +mac_vnode_notify_open(vfs_context_t ctx, struct vnode *vp, int acc_flags) +{ + kauth_cred_t cred; + + if (!mac_vnode_enforce || + !mac_context_check_enforce(ctx, MAC_VNODE_ENFORCE)) + return; + + cred = vfs_context_ucred(ctx); + MAC_PERFORM(vnode_notify_open, cred, vp, vp->v_label, acc_flags); +} + /* * Extended attribute 'name' was updated via * vn_setxattr() or vn_removexattr(). Allow the